* [PATCH 00/15] add virtio_blk device support to vdpa/ifc
@ 2022-01-25  6:47 Andy Pei
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                   ` (14 more replies)
  0 siblings, 15 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA lauch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured
 drivers/vdpa/ifc/base/ifcvf.c    |  42 ++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 534 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 +
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 152 +++++++++++
 examples/vdpa/vdpa_blk_compact.h | 118 +++++++++
 examples/vdpa/vhost_user.h       | 190 ++++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  15 ++
 usertools/dpdk-devbind.py        |   8 +
 12 files changed, 1053 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (8 more replies)
  2022-01-25  6:47 ` [PATCH 02/15] vhost: add vdpa ops for blk device Andy Pei
                   ` (13 subsequent siblings)
  14 siblings, 9 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemeted with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 96 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..48056d1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,14 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/**
+** vdpa decice info includes device features and devcic operation.
+**/
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1179,50 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/**
+		** Transitional devices: use the PCI subsystem device id as
+		** virtio device id, same as legacy driver always did.
+		**/
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/**
+		** Modern devices: simply use PCI device id,
+		** but start from 0x1040.
+		**/
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1181,6 +1234,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1230,13 +1284,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1248,7 +1313,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1316,6 +1382,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 02/15] vhost: add vdpa ops for blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  6:47 ` [PATCH 02/15] vhost: add vdpa ops for blk device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 88 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 48056d1..965baa2 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1093,6 +1093,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1207,6 +1211,88 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	__u64 capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %lu",
+			len, sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/**
+	** cannot read 64-bit register in one attempt,
+	** so read byte by byte.
+	**/
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (__u64)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1219,7 +1305,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (2 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For the blk we need to relay all the cmd of each queue.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 48 +++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 11 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 965baa2..9729490 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -374,24 +374,50 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/**
+				** For the net we only need to relay rx queue,
+				** which will change the mem of VM.
+				**/
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/**
+				** For the blk we need to relay all the read cmd
+				** of each queue
+				**/
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (3 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 06/15] example/vdpa:add vdpa blk support in example Andy Pei
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, Jin Yu
Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty. Maybe we can improve it later.
Signed-off-by: Jin Yu <jin.yu@intel.com>
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 130 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 118 insertions(+), 22 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9729490..1f832a3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -334,10 +334,68 @@ struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/**
+		** IFCVF marks dirty memory pages for only packet buffer,
+		** SW helps to mark the used ring as dirty after device stops.
+		**/
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -665,15 +723,18 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/**
+		** NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		** BLK: relay every queue
+		**/
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -692,7 +753,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -717,8 +781,10 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -730,6 +796,8 @@ struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -780,17 +848,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -878,7 +965,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 06/15] example/vdpa:add vdpa blk support in example
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (4 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 07/15] usertools: add support for virtio blk device Andy Pei
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 ++
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 152 +++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 118 ++++++++++++++++++++++++
 examples/vdpa/vhost_user.h       | 190 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 470 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..3fa3d3a 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
 	.new_device = new_device,
 	.destroy_device = destroy_device,
+	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+		< 0)
+		rte_exit(EXIT_FAILURE,
+			"set vhost blk driver features and protocal features failed: %s\n",
+			socket_path);
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
         'main.c',
+	'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 0000000..7310ebb
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,152 @@
+/*
+**    INTEL CONFIDENTIAL
+**
+**    Copyright (c) Intel Corporation.
+**    All rights reserved.
+**
+**    The source code contained or described herein and all documents related
+**    to the source code ("Material") are owned by Intel Corporation or its
+**    suppliers or licensors.  Title to the Material remains with Intel
+**    Corporation or its suppliers and licensors.  The Material contains trade
+**    secrets and proprietary and confidential information of Intel or its
+**    suppliers and licensors.  The Material is protected by worldwide
+**    copyright and trade secret laws and treaty provisions.  No part of the
+**    Material may be used, copied, reproduced, modified, published, uploaded,
+**    posted, transmitted, distributed, or disclosed in any way without Intel's
+**    prior express written permission.
+**
+**    No license under any patent, copyright, trade secret or other
+**    intellectual property right is granted to or conferred upon you by
+**    disclosure or delivery of the Materials, either expressly, by
+**    implication, inducement, estoppel or otherwise.  Any license under such
+**    intellectual property rights must be express and approved by Intel in
+**    writing.
+*/
+
+/**
+** @file
+**
+** Block device specific vhost lib
+**/
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <vdpa_driver.h>
+#include <rte_vhost.h>
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG	24
+#define VHOST_USER_SET_CONFIG	25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+	struct VhostUserMsg *msg = _msg;
+	struct rte_vdpa_device *vdev = NULL;
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	if (vdev == NULL)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	fprintf(stderr, "msg is %d\n", msg->request.master);
+	switch (msg->request.master) {
+	case VHOST_USER_GET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+		if (vdev->ops->get_config) {
+			fprintf(stdout, "get_config() function is valid!\n");
+			rc = vdev->ops->get_config(vid,
+						   msg->payload.cfg.region,
+						   msg->payload.cfg.size);
+			if (rc != 0) {
+				msg->size = 0;
+				fprintf(stdout, "get_config() return error!\n");
+			}
+		} else {
+			fprintf(stdout, "get_config() function is invalid!\n");
+		}
+
+		return RTE_VHOST_MSG_RESULT_REPLY;
+	}
+	case VHOST_USER_SET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout,
+			"read message VHOST_USER_SET_CONFIG\n");
+
+		if (vdev->ops->set_config) {
+			rc = vdev->ops->set_config(vid,
+				msg->payload.cfg.region,
+				msg->payload.cfg.offset,
+				msg->payload.cfg.size,
+				msg->payload.cfg.flags);
+		}
+
+		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+	}
+	default:
+		break;
+	}
+
+	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
+	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
+	.post_msg_handle = NULL,
+};
+
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid)
+{
+	int rc;
+
+	rc = rte_vhost_extern_callback_register(vid,
+						&g_blk_extern_vhost_ops,
+						NULL);
+	if (rc != 0) {
+		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
+		return -1;
+	}
+	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
+	return 0;
+}
+
+
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev)
+{
+	uint64_t protocol_features = 0;
+
+	if (!vdev) {
+		fprintf(stdout, "vdev is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* vdpa net does not have the get_config */
+	if (!vdev->ops->get_config)
+		return 0;
+
+	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
+	rte_vhost_driver_disable_features(path,
+		SPDK_VHOST_BLK_DISABLED_FEATURES);
+
+	rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+	rte_vhost_driver_set_protocol_features(path, protocol_features);
+
+	return 0;
+}
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..94bd9c1
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,118 @@
+/*
+**    INTEL CONFIDENTIAL
+**
+**    Copyright (c) Intel Corporation.
+**    All rights reserved.
+**
+**    The source code contained or described herein and all documents related
+**    to the source code ("Material") are owned by Intel Corporation or its
+**    suppliers or licensors.  Title to the Material remains with Intel
+**    Corporation or its suppliers and licensors.  The Material contains trade
+**    secrets and proprietary and confidential information of Intel or its
+**    suppliers and licensors.  The Material is protected by worldwide
+**    copyright and trade secret laws and treaty provisions.  No part of the
+**    Material may be used, copied, reproduced, modified, published, uploaded,
+**    posted, transmitted, distributed, or disclosed in any way without Intel's
+**    prior express written permission.
+**
+**    No license under any patent, copyright, trade secret or other
+**    intellectual property right is granted to or conferred upon you by
+**    disclosure or delivery of the Materials, either expressly, by
+**    implication, inducement, estoppel or otherwise.  Any license under such
+**    intellectual property rights must be express and approved by Intel in
+**    writing.
+*/
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function will set vhost user block
+ *
+ * @param path
+ *  socket path
+ */
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Install external hook to handle vhost user block message
+ *
+ * @param vid
+ *  vhost device id
+ */
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid);
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..b9e1be1
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,190 @@
+/*
+**    INTEL CONFIDENTIAL
+**
+**    Copyright (c) Intel Corporation.
+**    All rights reserved.
+**
+**    The source code contained or described herein and all documents related
+**    to the source code ("Material") are owned by Intel Corporation or its
+**    suppliers or licensors.  Title to the Material remains with Intel
+**    Corporation or its suppliers and licensors.  The Material contains trade
+**    secrets and proprietary and confidential information of Intel or its
+**    suppliers and licensors.  The Material is protected by worldwide
+**    copyright and trade secret laws and treaty provisions.  No part of the
+**    Material may be used, copied, reproduced, modified, published, uploaded,
+**    posted, transmitted, distributed, or disclosed in any way without Intel's
+**    prior express written permission.
+**
+**    No license under any patent, copyright, trade secret or other
+**    intellectual property right is granted to or conferred upon you by
+**    disclosure or delivery of the Materials, either expressly, by
+**    implication, inducement, estoppel or otherwise.  Any license under such
+**    intellectual property rights must be express and approved by Intel in
+**    writing.
+*/
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 07/15] usertools: add support for virtio blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (5 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (6 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 07/15] usertools: add support for virtio blk device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1f832a3..eff6ff3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1384,6 +1384,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1401,7 +1411,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (7 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index eff6ff3..0b4b77f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1447,6 +1447,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	__u64 capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1513,6 +1516,32 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/**
+		** cannot read 64-bit register in one attempt,
+		** so read byte by byte.
+		**/
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (__u64)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (8 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0b4b77f..f092aca 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1542,6 +1542,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (9 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 113 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f092aca..2552375 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -622,6 +624,108 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+
+	return;
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -648,10 +752,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -969,6 +1079,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (10 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 2552375..546f9bd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1621,11 +1621,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (11 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-01-25  6:47 ` [PATCH 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (12 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  2022-01-25  6:47 ` [PATCH 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 546f9bd..ff233bc 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -359,23 +359,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -766,7 +775,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH 15/15] vhost: make sure each queue callfd is configured
  2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                   ` (13 preceding siblings ...)
  2022-01-25  6:47 ` [PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-01-25  6:47 ` Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  6:47 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
During the vhost data path building process, qemu will create a call fd at first,
and create another call fd in the end. The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will set the first
call fd. Even though the actual call fd will set, the data path will not work correctly.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..0be879a 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,27 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR,
 					 "Failed to configure vDPA device\n");
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/**
+		** when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		** close the device and config the device again,
+		** make sure the call fd of each queue is configed correctly.
+		**/
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-25  9:37   ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (14 more replies)
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (7 subsequent siblings)
  8 siblings, 15 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v2:
 Fix some coding style issue.
Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured
 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 520 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 +
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 +++++++++
 examples/vdpa/vhost_user.h       | 189 ++++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  14 ++
 usertools/dpdk-devbind.py        |   8 +
 12 files changed, 1034 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 02/15] vhost: add vdpa ops for " Andy Pei
                       ` (13 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..96b67dd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1177,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1181,6 +1230,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1230,13 +1280,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1248,7 +1309,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1316,6 +1378,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 02/15] vhost: add vdpa ops for blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  9:37     ` [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-25  9:37     ` [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  9:37     ` [PATCH v2 02/15] vhost: add vdpa ops for " Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 96b67dd..57fdd2c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1091,6 +1091,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1203,6 +1207,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	__u64 capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %lu",
+			len, sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (__u64)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1215,7 +1298,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
                       ` (10 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For the blk we need to relay all the cmd of each queue.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 57fdd2c..ef5b36c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 06/15] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 116 insertions(+), 22 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ef5b36c..14bc5c8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/* IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 06/15] example/vdpa:add vdpa blk support in example
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 07/15] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device suppoort to vdpa example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 ++
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
 examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..924ad7b 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
 	.new_device = new_device,
 	.destroy_device = destroy_device,
+	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+		< 0)
+		rte_exit(EXIT_FAILURE,
+			"set vhost blk driver features and protocol features failed: %s\n",
+			socket_path);
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
         'main.c',
+	'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 0000000..0c4d3ee
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,150 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+/* @file
+ *
+ * Block device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <vdpa_driver.h>
+#include <rte_vhost.h>
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG	24
+#define VHOST_USER_SET_CONFIG	25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+	struct VhostUserMsg *msg = _msg;
+	struct rte_vdpa_device *vdev = NULL;
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	if (vdev == NULL)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	fprintf(stderr, "msg is %d\n", msg->request.master);
+	switch (msg->request.master) {
+	case VHOST_USER_GET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+		if (vdev->ops->get_config) {
+			fprintf(stdout, "get_config() function is valid!\n");
+			rc = vdev->ops->get_config(vid,
+						   msg->payload.cfg.region,
+						   msg->payload.cfg.size);
+			if (rc != 0) {
+				msg->size = 0;
+				fprintf(stdout, "get_config() return error!\n");
+			}
+		} else {
+			fprintf(stdout, "get_config() function is invalid!\n");
+		}
+
+		return RTE_VHOST_MSG_RESULT_REPLY;
+	}
+	case VHOST_USER_SET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout,
+			"read message VHOST_USER_SET_CONFIG\n");
+
+		if (vdev->ops->set_config) {
+			rc = vdev->ops->set_config(vid,
+				msg->payload.cfg.region,
+				msg->payload.cfg.offset,
+				msg->payload.cfg.size,
+				msg->payload.cfg.flags);
+		}
+
+		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+	}
+	default:
+		break;
+	}
+
+	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
+	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
+	.post_msg_handle = NULL,
+};
+
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid)
+{
+	int rc;
+
+	rc = rte_vhost_extern_callback_register(vid,
+						&g_blk_extern_vhost_ops,
+						NULL);
+	if (rc != 0) {
+		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
+		return -1;
+	}
+	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
+	return 0;
+}
+
+
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev)
+{
+	uint64_t protocol_features = 0;
+
+	if (!vdev) {
+		fprintf(stdout, "vdev is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* vdpa net does not have the get_config */
+	if (!vdev->ops->get_config)
+		return 0;
+
+	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
+	rte_vhost_driver_disable_features(path,
+		SPDK_VHOST_BLK_DISABLED_FEATURES);
+
+	rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+	rte_vhost_driver_set_protocol_features(path, protocol_features);
+
+	return 0;
+}
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..420d48e
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,117 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function will set vhost user block
+ *
+ * @param path
+ *  socket path
+ */
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Install external hook to handle vhost user block message
+ *
+ * @param vid
+ *  vhost device id
+ */
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid);
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..8b747d0
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,189 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 07/15] usertools: add support for virtio blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 07/15] usertools: add support for virtio blk device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 14bc5c8..00e7274 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1373,6 +1373,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1390,7 +1400,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 00e7274..ff91e80 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1436,6 +1436,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	__u64 capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1502,6 +1505,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (__u64)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ff91e80..d30c3fd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1530,6 +1530,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 111 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d30c3fd..981cb26 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -617,6 +619,106 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -643,10 +745,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -963,6 +1071,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add is_blk flag to ifcvf_hw, and init is_blk during probe.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 981cb26..4eb8f98 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1608,11 +1608,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-01-25  9:37     ` [PATCH v2 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-25  9:37     ` [PATCH v2 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
When virtio blk device is pause, make sure hardware last_avail_idx and last_used_idx is the same.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4eb8f98..b0b2859 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -356,23 +356,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -759,7 +768,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v2 15/15] vhost: make sure each queue callfd is configured
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-01-25  9:37     ` [PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-01-25  9:37     ` Andy Pei
  2022-01-27  7:13       ` Xia, Chenbo
  14 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-01-25  9:37 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..b25b25f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR,
 					 "Failed to configure vDPA device\n");
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v2 15/15] vhost: make sure each queue callfd is configured
  2022-01-25  9:37     ` [PATCH v2 15/15] vhost: make sure each queue callfd is configured Andy Pei
@ 2022-01-27  7:13       ` Xia, Chenbo
  2022-01-29  3:11         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-01-27  7:13 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Andy,
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, January 25, 2022 5:37 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao, Gang
> <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v2 15/15] vhost: make sure each queue callfd is configured
> 
> During the vhost data path building process, qemu will create
> a call fd at first, and create another call fd in the end.
> The final call fd will be used to relay notify.
> In the original code, after kick fd is set, dev_conf will
> set the first call fd. Even though the actual call fd will set,
> the data path will not work correctly.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 1.8.3.1
Please fix all reported error on patchwork first.
http://patchwork.dpdk.org/project/dpdk/patch/1643103437-118618-16-git-send-email-andy.pei@intel.com/
Thanks,
Chenbo
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-29  3:03   ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (14 more replies)
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (6 subsequent siblings)
  8 siblings, 15 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.
Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the
    same when blk device pause
  vhost: make sure each queue callfd is configured
 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 520 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 +
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 +++++++++
 examples/vdpa/vhost_user.h       | 189 ++++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  14 ++
 usertools/dpdk-devbind.py        |   8 +
 12 files changed, 1034 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22  8:57       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 02/15] vhost: add vdpa ops for " Andy Pei
                       ` (13 subsequent siblings)
  14 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..96b67dd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1177,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1181,6 +1230,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1230,13 +1280,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1248,7 +1309,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1316,6 +1378,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 02/15] vhost: add vdpa ops for blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22  9:12       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  14 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-29  3:03     ` [PATCH v3 02/15] vhost: add vdpa ops for " Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22  9:58       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  14 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 96b67dd..778e1fd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1091,6 +1091,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1203,6 +1207,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1215,7 +1298,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22 10:04       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
                       ` (10 subsequent siblings)
  14 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For the blk we need to relay all the cmd of each queue.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 778e1fd..4f99bb3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22 11:10       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 06/15] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  14 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 116 insertions(+), 22 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4f99bb3..a930825 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/* IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-03-22 11:29       ` Maxime Coquelin
  2022-01-29  3:03     ` [PATCH v3 07/15] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  14 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to vdpa example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/Makefile           |   2 +-
 examples/vdpa/main.c             |   8 ++
 examples/vdpa/meson.build        |   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
 examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..924ad7b 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
 	.new_device = new_device,
 	.destroy_device = destroy_device,
+	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+		< 0)
+		rte_exit(EXIT_FAILURE,
+			"set vhost blk driver features and protocol features failed: %s\n",
+			socket_path);
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
         'main.c',
+	'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 0000000..0c4d3ee
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,150 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+/* @file
+ *
+ * Block device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <vdpa_driver.h>
+#include <rte_vhost.h>
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG	24
+#define VHOST_USER_SET_CONFIG	25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+	struct VhostUserMsg *msg = _msg;
+	struct rte_vdpa_device *vdev = NULL;
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	if (vdev == NULL)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	fprintf(stderr, "msg is %d\n", msg->request.master);
+	switch (msg->request.master) {
+	case VHOST_USER_GET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+		if (vdev->ops->get_config) {
+			fprintf(stdout, "get_config() function is valid!\n");
+			rc = vdev->ops->get_config(vid,
+						   msg->payload.cfg.region,
+						   msg->payload.cfg.size);
+			if (rc != 0) {
+				msg->size = 0;
+				fprintf(stdout, "get_config() return error!\n");
+			}
+		} else {
+			fprintf(stdout, "get_config() function is invalid!\n");
+		}
+
+		return RTE_VHOST_MSG_RESULT_REPLY;
+	}
+	case VHOST_USER_SET_CONFIG: {
+		int rc = 0;
+
+		fprintf(stdout,
+			"read message VHOST_USER_SET_CONFIG\n");
+
+		if (vdev->ops->set_config) {
+			rc = vdev->ops->set_config(vid,
+				msg->payload.cfg.region,
+				msg->payload.cfg.offset,
+				msg->payload.cfg.size,
+				msg->payload.cfg.flags);
+		}
+
+		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+	}
+	default:
+		break;
+	}
+
+	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
+}
+
+struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
+	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
+	.post_msg_handle = NULL,
+};
+
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid)
+{
+	int rc;
+
+	rc = rte_vhost_extern_callback_register(vid,
+						&g_blk_extern_vhost_ops,
+						NULL);
+	if (rc != 0) {
+		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
+		return -1;
+	}
+	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
+	return 0;
+}
+
+
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev)
+{
+	uint64_t protocol_features = 0;
+
+	if (!vdev) {
+		fprintf(stdout, "vdev is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* vdpa net does not have the get_config */
+	if (!vdev->ops->get_config)
+		return 0;
+
+	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
+	rte_vhost_driver_disable_features(path,
+		SPDK_VHOST_BLK_DISABLED_FEATURES);
+
+	rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+	rte_vhost_driver_set_protocol_features(path, protocol_features);
+
+	return 0;
+}
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..420d48e
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,117 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function will set vhost user block
+ *
+ * @param path
+ *  socket path
+ */
+int
+vdpa_blk_device_set_features_and_protocol(const char *path,
+	struct rte_vdpa_device *vdev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Install external hook to handle vhost user block message
+ *
+ * @param vid
+ *  vhost device id
+ */
+int
+rte_vhost_blk_session_install_rte_compat_hooks(int vid);
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..8b747d0
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,189 @@
+/*    INTEL CONFIDENTIAL
+ *
+ *    Copyright (c) Intel Corporation.
+ *    All rights reserved.
+ *
+ *    The source code contained or described herein and all documents related
+ *    to the source code ("Material") are owned by Intel Corporation or its
+ *    suppliers or licensors.  Title to the Material remains with Intel
+ *    Corporation or its suppliers and licensors.  The Material contains trade
+ *    secrets and proprietary and confidential information of Intel or its
+ *    suppliers and licensors.  The Material is protected by worldwide
+ *    copyright and trade secret laws and treaty provisions.  No part of the
+ *    Material may be used, copied, reproduced, modified, published, uploaded,
+ *    posted, transmitted, distributed, or disclosed in any way without Intel's
+ *    prior express written permission.
+ *
+ *    No license under any patent, copyright, trade secret or other
+ *    intellectual property right is granted to or conferred upon you by
+ *    disclosure or delivery of the Materials, either expressly, by
+ *    implication, inducement, estoppel or otherwise.  Any license under such
+ *    intellectual property rights must be express and approved by Intel in
+ *    writing.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 07/15] usertools: add support for virtio blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 07/15] usertools: add support for virtio blk device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a930825..24ae27b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1373,6 +1373,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1390,7 +1400,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 24ae27b..3c4e5f6 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1436,6 +1436,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1502,6 +1505,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 10/15] vdpa/ifc: read virtio max_queues from hardware
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3c4e5f6..86dd1c6 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1530,6 +1530,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 111 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 86dd1c6..37fa45e 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -617,6 +619,106 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -643,10 +745,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -963,6 +1071,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add is_blk flag to ifcvf_hw, and init is_blk during probe.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 37fa45e..b65e3a3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1608,11 +1608,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause Andy Pei
  2022-01-29  3:03     ` [PATCH v3 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  2022-01-29  3:03     ` [PATCH v3 15/15] vhost: make sure each queue callfd is configured Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index b65e3a3..75dbe63 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -356,23 +356,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -759,7 +768,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v3 15/15] vhost: make sure each queue callfd is configured
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-01-29  3:03     ` [PATCH v3 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause Andy Pei
@ 2022-01-29  3:03     ` Andy Pei
  14 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-01-29  3:03 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..b25b25f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR,
 					 "Failed to configure vDPA device\n");
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v2 15/15] vhost: make sure each queue callfd is configured
  2022-01-27  7:13       ` Xia, Chenbo
@ 2022-01-29  3:11         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-01-29  3:11 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
I will send out a V3 patch set to address that.
-----Original Message-----
From: Xia, Chenbo <chenbo.xia@intel.com> 
Sent: Thursday, January 27, 2022 3:13 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: RE: [PATCH v2 15/15] vhost: make sure each queue callfd is configured
Hi Andy,
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, January 25, 2022 5:37 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; 
> Cao, Gang <gang.cao@intel.com>; Liu, Changpeng 
> <changpeng.liu@intel.com>
> Subject: [PATCH v2 15/15] vhost: make sure each queue callfd is 
> configured
> 
> During the vhost data path building process, qemu will create a call 
> fd at first, and create another call fd in the end.
> The final call fd will be used to relay notify.
> In the original code, after kick fd is set, dev_conf will set the 
> first call fd. Even though the actual call fd will set, the data path 
> will not work correctly.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 1.8.3.1
Please fix all reported error on patchwork first.
http://patchwork.dpdk.org/project/dpdk/patch/1643103437-118618-16-git-send-email-andy.pei@intel.com/
Thanks,
Chenbo
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device
  2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-03-22  8:57       ` Maxime Coquelin
  0 siblings, 0 replies; 263+ messages in thread
From: Maxime Coquelin @ 2022-03-22  8:57 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu
On 1/29/22 04:03, Andy Pei wrote:
> Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
> Blk and net device are implemented with proper feature and ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
>   2 files changed, 98 insertions(+), 10 deletions(-)
> 
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v3 02/15] vhost: add vdpa ops for blk device
  2022-01-29  3:03     ` [PATCH v3 02/15] vhost: add vdpa ops for " Andy Pei
@ 2022-03-22  9:12       ` Maxime Coquelin
  0 siblings, 0 replies; 263+ messages in thread
From: Maxime Coquelin @ 2022-03-22  9:12 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu
On 1/29/22 04:03, Andy Pei wrote:
> Get_config and set_config are necessary ops for blk device.
> Add get_config and set_config ops to vdpa ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   lib/vhost/vdpa_driver.h | 8 ++++++--
>   1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
> index fc2d6ac..9a23db9 100644
> --- a/lib/vhost/vdpa_driver.h
> +++ b/lib/vhost/vdpa_driver.h
> @@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
>   	/** Reset statistics of the queue */
>   	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
>   
> -	/** Reserved for future extension */
> -	void *reserved[2];
> +	/** Get the device configuration space */
> +	int (*get_config)(int vid, uint8_t *config, uint32_t len);
> +
> +	/** Set the device configuration space */
> +	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
> +		      uint32_t size, uint32_t flags);
>   };
>   
>   /**
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device
  2022-01-29  3:03     ` [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-03-22  9:58       ` Maxime Coquelin
  0 siblings, 0 replies; 263+ messages in thread
From: Maxime Coquelin @ 2022-03-22  9:58 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu
On 1/29/22 04:03, Andy Pei wrote:
> For virtio blk device, re-use part of ifc driver ops.
> Implement ifcvf_blk_get_config for virtio blk device.
> Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
> blk device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.h |  4 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 88 insertions(+), 1 deletion(-)
> 
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-01-29  3:03     ` [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-03-22 10:04       ` Maxime Coquelin
  2022-03-23  7:07         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Maxime Coquelin @ 2022-03-22 10:04 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu
On 1/29/22 04:03, Andy Pei wrote:
> For the blk we need to relay all the cmd of each queue.
The message is not clear to me, do you mean "For the block device type,
we have to relay the commands on all queues."?
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
>   1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 778e1fd..4f99bb3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
>   	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>   	irq_set->start = 0;
>   	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change notification */
>   	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>   		rte_intr_fd_get(internal->pdev->intr_handle);
>   
>   	for (i = 0; i < nr_vring; i++)
>   		internal->intr_fd[i] = -1;
>   
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   		}
>   	}
>   
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
  2022-01-29  3:03     ` [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
@ 2022-03-22 11:10       ` Maxime Coquelin
  2022-03-23  9:08         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Maxime Coquelin @ 2022-03-22 11:10 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu
Hi Andy,
"vdpa/ifc: add block device SW live-migration"
On 1/29/22 04:03, Andy Pei wrote:
> Enable virtio blk sw live migration relay callfd and log the dirty page.
Please try to make the above sentence simpler. Also, it seems that below
patch changes behaviour for net devices, so the commit message should
explain that.
> In this version we ignore the write cmd and still mark it dirty.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>   drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
>   3 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index 721cb1d..3a69e53 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -189,7 +189,7 @@
>   	IFCVF_WRITE_REG32(val >> 32, hi);
>   }
>   
> -STATIC int
> +int
>   ifcvf_hw_enable(struct ifcvf_hw *hw)
>   {
>   	struct ifcvf_pci_common_cfg *cfg;
> @@ -238,7 +238,7 @@
>   	return 0;
>   }
>   
> -STATIC void
> +void
>   ifcvf_hw_disable(struct ifcvf_hw *hw)
>   {
>   	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>   u64
>   ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
>   
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>   #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 4f99bb3..a930825 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
>   
>   	rte_vhost_get_negotiated_features(vid, &features);
>   	if (RTE_VHOST_NEED_LOG(features)) {
> -		ifcvf_disable_logging(hw);
> -		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> -		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> -				log_base, IFCVF_LOG_BASE, log_size);
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
> +		/* IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}
> +
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
> +{
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
>   		/*
>   		 * IFCVF marks dirty memory pages for only packet buffer,
>   		 * SW helps to mark the used ring as dirty after device stops.
> @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
>   		}
>   		hw->vring[i].avail = gpa;
>   
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
>   			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>   			if (gpa == 0) {
>   				DRV_LOG(ERR, "Fail to get GPA for used ring.");
>   				return -1;
>   			}
>   			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
>   			hw->vring[i].used = m_vring_iova +
>   				(char *)internal->m_vring[i].used -
>   				(char *)internal->m_vring[i].desc;
> @@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
>   	}
>   	hw->nr_vring = nr_vring;
>   
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
>   
>   error:
>   	for (i = 0; i < nr_vring; i++)
> @@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
>   
>   	for (i = 0; i < hw->nr_vring; i++) {
>   		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
> +		     internal->device_type == IFCVF_BLK) {
>   			update_used_ring(internal, i);
> +		}
>   
>   		rte_vhost_get_vhost_vring(vid, i, &vq);
>   		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
>   			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
>   			m_vring_iova, size);
>   
> +		hw->vring[i].last_avail_idx = vq.used->idx;
> +		hw->vring[i].last_used_idx = vq.used->idx;
>   		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
>   				hw->vring[i].last_used_idx);
>   		rte_free(internal->m_vring[i].desc);
> @@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
>   		}
>   	}
>   
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
>   		}
> -		update_used_ring(internal, qid);
>   	}
>   
>   	/* start relay with a first kick */
> @@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
>   
>   	/* stop the direct IO data path */
>   	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>   	vdpa_disable_vfio_intr(internal);
>   
>   	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
  2022-01-29  3:03     ` [PATCH v3 06/15] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-03-22 11:29       ` Maxime Coquelin
  2022-03-23  9:31         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Maxime Coquelin @ 2022-03-22 11:29 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu
On 1/29/22 04:03, Andy Pei wrote:
> Add virtio blk device support to vdpa example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   examples/vdpa/Makefile           |   2 +-
>   examples/vdpa/main.c             |   8 ++
>   examples/vdpa/meson.build        |   1 +
>   examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
>   examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
>   examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
>   6 files changed, 466 insertions(+), 1 deletion(-)
>   create mode 100644 examples/vdpa/vdpa_blk_compact.c
>   create mode 100644 examples/vdpa/vdpa_blk_compact.h
>   create mode 100644 examples/vdpa/vhost_user.h
> 
> diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
> index d974db4..9d0479b 100644
> --- a/examples/vdpa/Makefile
> +++ b/examples/vdpa/Makefile
> @@ -5,7 +5,7 @@
>   APP = vdpa
>   
>   # all source are stored in SRCS-y
> -SRCS-y := main.c
> +SRCS-y := main.c vdpa_blk_compact.c
>   CFLAGS += -DALLOW_EXPERIMENTAL_API
>   
>   PKGCONF ?= pkg-config
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
> index 5ab0765..924ad7b 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>   #include <cmdline_parse_string.h>
>   #include <cmdline_parse_num.h>
>   #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
>   
>   #define MAX_PATH_LEN 128
>   #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -156,6 +157,7 @@ struct vdpa_port {
>   static const struct rte_vhost_device_ops vdpa_sample_devops = {
>   	.new_device = new_device,
>   	.destroy_device = destroy_device,
> +	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
>   };
>   
>   static int
> @@ -192,6 +194,12 @@ struct vdpa_port {
>   			"attach vdpa device failed: %s\n",
>   			socket_path);
>   
> +	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
> +		< 0)
> +		rte_exit(EXIT_FAILURE,
> +			"set vhost blk driver features and protocol features failed: %s\n",
> +			socket_path);
> +
That does not look right, blk devices specitic functions shuold be
called only for block devices.
>   	if (rte_vhost_driver_start(socket_path) < 0)
>   		rte_exit(EXIT_FAILURE,
>   			"start vhost driver failed: %s\n",
> diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
> index bd08605..f0d111c 100644
> --- a/examples/vdpa/meson.build
> +++ b/examples/vdpa/meson.build
> @@ -15,4 +15,5 @@ deps += 'vhost'
>   allow_experimental_apis = true
>   sources = files(
>           'main.c',
> +	'vdpa_blk_compact.c',
>   )
> diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
> new file mode 100644
> index 0000000..0c4d3ee
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.c
> @@ -0,0 +1,150 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +/* @file
> + *
> + * Block device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_malloc.h>
> +#include <vdpa_driver.h>
That's wrong, the application is not supposed to include the driver
APIs.
> +#include <rte_vhost.h>
> +#include "vdpa_blk_compact.h"
> +#include "vhost_user.h"
> +
> +#define VHOST_USER_GET_CONFIG	24
> +#define VHOST_USER_SET_CONFIG	25
> +
> +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> +#define VHOST_USER_PROTOCOL_F_CONFIG   9
> +#endif
> +
> +/*
> + * Function to handle vhost user blk message
> + */
> +static enum rte_vhost_msg_result
> +rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
> +{
> +	struct VhostUserMsg *msg = _msg;
> +	struct rte_vdpa_device *vdev = NULL;
> +
> +	vdev = rte_vhost_get_vdpa_device(vid);
> +	if (vdev == NULL)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	fprintf(stderr, "msg is %d\n", msg->request.master);
> +	switch (msg->request.master) {
> +	case VHOST_USER_GET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
> +
> +		if (vdev->ops->get_config) {
> +			fprintf(stdout, "get_config() function is valid!\n");
> +			rc = vdev->ops->get_config(vid,
> +						   msg->payload.cfg.region,
> +						   msg->payload.cfg.size);
> +			if (rc != 0) {
> +				msg->size = 0;
> +				fprintf(stdout, "get_config() return error!\n");
> +			}
> +		} else {
> +			fprintf(stdout, "get_config() function is invalid!\n");
> +		}
> +
> +		return RTE_VHOST_MSG_RESULT_REPLY;
> +	}
> +	case VHOST_USER_SET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout,
> +			"read message VHOST_USER_SET_CONFIG\n");
> +
> +		if (vdev->ops->set_config) {
> +			rc = vdev->ops->set_config(vid,
> +				msg->payload.cfg.region,
> +				msg->payload.cfg.offset,
> +				msg->payload.cfg.size,
> +				msg->payload.cfg.flags);
> +		}
> +
> +		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
> +	}
> +	default:
> +		break;
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
> +}
I think above message handling should be done in the Vhost library
directly. VHOST_USER_SET_CONFIG and VHOST_USER_GET_CONFIG are not
specific to blk backends, these are generic messages.
> +
> +struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
> +	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
> +	.post_msg_handle = NULL,
> +};
> +
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid)
> +{
> +	int rc;
> +
> +	rc = rte_vhost_extern_callback_register(vid,
> +						&g_blk_extern_vhost_ops,
> +						NULL);
> +	if (rc != 0) {
> +		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
> +		return -1;
> +	}
> +	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
> +	return 0;
> +}
> +
> +
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev)
> +{
> +	uint64_t protocol_features = 0;
> +
> +	if (!vdev) {
> +		fprintf(stdout, "vdev is NULL.\n");
> +		return -EINVAL;
> +	}
> +
> +	/* vdpa net does not have the get_config */
> +	if (!vdev->ops->get_config)
> +		return 0;
That's not good, as I said earlier, the drivers callback should not be
visible to the application. Maybe the VDPA API should be extended to
return the device type, I'm not sure, but accessing the drivers ops is
prohibited.
> +	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
> +	rte_vhost_driver_disable_features(path,
> +		SPDK_VHOST_BLK_DISABLED_FEATURES);
> +
> +	rte_vhost_driver_get_protocol_features(path, &protocol_features);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> +	rte_vhost_driver_set_protocol_features(path, protocol_features);
> +
> +	return 0;
> +}
> diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
> new file mode 100644
> index 0000000..420d48e
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,117 @@
> +/*    INTEL CONFIDENTIAL
I hope it was not supposed to be confidential :)
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +/vdpa
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
> +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
> +
> +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
> +#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES
> +#define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#endif
> +
> +#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
Why these references to SPDK?
> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> +
> +#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> +
> +#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> +
> +/* Vhost-blk support protocol features */
> +#define SPDK_VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * This function will set vhost user block
> + *
> + * @param path
> + *  socket path
> + */
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Install external hook to handle vhost user block message
> + *
> + * @param vid
> + *  vhost device id
> + */
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid);
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
> new file mode 100644
> index 0000000..8b747d0
> --- /dev/null
> +++ b/examples/vdpa/vhost_user.h
> @@ -0,0 +1,189 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "rte_vhost.h"
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif
> +
> +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
> +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SET_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> +	VHOST_USER_POSTCOPY_ADVISE = 28,
> +	VHOST_USER_POSTCOPY_LISTEN = 29,
> +	VHOST_USER_POSTCOPY_END = 30,
> +	VHOST_USER_GET_INFLIGHT_FD = 31,
> +	VHOST_USER_SET_INFLIGHT_FD = 32,
> +	VHOST_USER_MAX = 33
> +} VhostUserRequest;
> +
> +typedef enum VhostUserSlaveRequest {
> +	VHOST_USER_SLAVE_NONE = 0,
> +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> +	VHOST_USER_SLAVE_MAX
> +} VhostUserSlaveRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserLog {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +} VhostUserLog;
> +
> +/* Comply with Cryptodev-Linux */
> +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> +
> +/* Same structure as vhost-user backend session info */
> +typedef struct VhostUserCryptoSessionParam {
> +	int64_t session_id;
> +	uint32_t op_code;
> +	uint32_t cipher_algo;
> +	uint32_t cipher_key_len;
> +	uint32_t hash_algo;
> +	uint32_t digest_len;
> +	uint32_t auth_key_len;
> +	uint32_t aad_len;
> +	uint8_t op_type;
> +	uint8_t dir;
> +	uint8_t hash_mode;
> +	uint8_t chaining_dir;
> +	uint8_t *ciphe_key;
> +	uint8_t *auth_key;
> +	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> +	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> +} VhostUserCryptoSessionParam;
> +
> +typedef struct VhostUserVringArea {
> +	uint64_t u64;
> +	uint64_t size;
> +	uint64_t offset;
> +} VhostUserVringArea;
> +
> +typedef struct VhostUserInflight {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +	uint16_t num_queues;
> +	uint16_t queue_size;
> +} VhostUserInflight;
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
> +typedef struct VhostUserMsg {
> +	union {
> +		uint32_t master; /* a VhostUserRequest value */
> +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> +	} request;
> +
> +#define VHOST_USER_VERSION_MASK     0x3
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   0xff
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +		VhostUserMemory memory;
> +		VhostUserLog    log;
> +		struct vhost_iotlb_msg iotlb;
> +		VhostUserCryptoSessionParam crypto_session;
> +		VhostUserVringArea area;
> +		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +	int fd_num;
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    0x1
> +#endif
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-22 10:04       ` Maxime Coquelin
@ 2022-03-23  7:07         ` Pei, Andy
  2022-03-23  7:42           ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Pei, Andy @ 2022-03-23  7:07 UTC (permalink / raw)
  To: Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng
Hi Maxime,
Thanks for your reply and my reply is inline.
-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 6:05 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
On 1/29/22 04:03, Andy Pei wrote:
> For the blk we need to relay all the cmd of each queue.
The message is not clear to me, do you mean "For the block device type, we have to relay the commands on all queues."?
Andy: Yes. For BLK device, device can work with single queue, comparing to NET device, NET device use queue pair.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
>   1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c 
> b/drivers/vdpa/ifc/ifcvf_vdpa.c index 778e1fd..4f99bb3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
>   	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>   	irq_set->start = 0;
>   	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change 
> +notification */
>   	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>   		rte_intr_fd_get(internal->pdev->intr_handle);
>   
>   	for (i = 0; i < nr_vring; i++)
>   		internal->intr_fd[i] = -1;
>   
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   		}
>   	}
>   
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-23  7:07         ` Pei, Andy
@ 2022-03-23  7:42           ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-03-23  7:42 UTC (permalink / raw)
  To: Pei, Andy, Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng
Hi Maxime,
I think it is better to change the commit log to your description.
"For the block device type, we have to relay the commands on all queues."
In the next version of patch set.
-----Original Message-----
From: Pei, Andy <andy.pei@intel.com> 
Sent: Wednesday, March 23, 2022 3:08 PM
To: Maxime Coquelin <maxime.coquelin@redhat.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
Hi Maxime,
Thanks for your reply and my reply is inline.
-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 6:05 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
On 1/29/22 04:03, Andy Pei wrote:
> For the blk we need to relay all the cmd of each queue.
The message is not clear to me, do you mean "For the block device type, we have to relay the commands on all queues."?
Andy: Yes. For BLK device, device can work with single queue, comparing to NET device, NET device use queue pair.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
>   1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c 
> b/drivers/vdpa/ifc/ifcvf_vdpa.c index 778e1fd..4f99bb3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
>   	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>   	irq_set->start = 0;
>   	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change 
> +notification */
>   	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>   		rte_intr_fd_get(internal->pdev->intr_handle);
>   
>   	for (i = 0; i < nr_vring; i++)
>   		internal->intr_fd[i] = -1;
>   
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>   		}
>   	}
>   
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
  2022-03-22 11:10       ` Maxime Coquelin
@ 2022-03-23  9:08         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-03-23  9:08 UTC (permalink / raw)
  To: Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng
Hi Maxime,
Thanks for your reply and reply is inline.
-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 7:10 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
Hi Andy,
"vdpa/ifc: add block device SW live-migration"
On 1/29/22 04:03, Andy Pei wrote:
> Enable virtio blk sw live migration relay callfd and log the dirty page.
Please try to make the above sentence simpler. Also, it seems that below patch changes behaviour for net devices, so the commit message should explain that.
Andy: Sure, I think it is better to send out a new patch set.
Using a simper commit log and re-work to make sure the code do not change the behavior of net device.
> In this version we ignore the write cmd and still mark it dirty.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>   drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
>   3 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c 
> b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..3a69e53 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -189,7 +189,7 @@
>   	IFCVF_WRITE_REG32(val >> 32, hi);
>   }
>   
> -STATIC int
> +int
>   ifcvf_hw_enable(struct ifcvf_hw *hw)
>   {
>   	struct ifcvf_pci_common_cfg *cfg;
> @@ -238,7 +238,7 @@
>   	return 0;
>   }
>   
> -STATIC void
> +void
>   ifcvf_hw_disable(struct ifcvf_hw *hw)
>   {
>   	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h 
> b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>   u64
>   ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
>   
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>   #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c 
> b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4f99bb3..a930825 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
>   
>   	rte_vhost_get_negotiated_features(vid, &features);
>   	if (RTE_VHOST_NEED_LOG(features)) {
> -		ifcvf_disable_logging(hw);
> -		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> -		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> -				log_base, IFCVF_LOG_BASE, log_size);
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
> +		/* IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}
> +
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) {
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
>   		/*
>   		 * IFCVF marks dirty memory pages for only packet buffer,
>   		 * SW helps to mark the used ring as dirty after device stops.
> @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
>   		}
>   		hw->vring[i].avail = gpa;
>   
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
>   			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>   			if (gpa == 0) {
>   				DRV_LOG(ERR, "Fail to get GPA for used ring.");
>   				return -1;
>   			}
>   			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
>   			hw->vring[i].used = m_vring_iova +
>   				(char *)internal->m_vring[i].used -
>   				(char *)internal->m_vring[i].desc; @@ -688,7 +747,10 @@ struct 
> rte_vdpa_dev_info {
>   	}
>   	hw->nr_vring = nr_vring;
>   
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
>   
>   error:
>   	for (i = 0; i < nr_vring; i++)
> @@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
>   
>   	for (i = 0; i < hw->nr_vring; i++) {
>   		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
> +		     internal->device_type == IFCVF_BLK) {
>   			update_used_ring(internal, i);
> +		}
>   
>   		rte_vhost_get_vhost_vring(vid, i, &vq);
>   		len = IFCVF_USED_RING_LEN(vq.size); @@ -726,6 +790,8 @@ struct 
> rte_vdpa_dev_info {
>   			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
>   			m_vring_iova, size);
>   
> +		hw->vring[i].last_avail_idx = vq.used->idx;
> +		hw->vring[i].last_used_idx = vq.used->idx;
>   		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
>   				hw->vring[i].last_used_idx);
>   		rte_free(internal->m_vring[i].desc);
> @@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
>   		}
>   	}
>   
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
>   		}
> -		update_used_ring(internal, qid);
>   	}
>   
>   	/* start relay with a first kick */ @@ -874,7 +959,10 @@ struct 
> rte_vdpa_dev_info {
>   
>   	/* stop the direct IO data path */
>   	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>   	vdpa_disable_vfio_intr(internal);
>   
>   	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, 
> false);
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
  2022-03-22 11:29       ` Maxime Coquelin
@ 2022-03-23  9:31         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-03-23  9:31 UTC (permalink / raw)
  To: Maxime Coquelin, dev; +Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng
Hi Maxime,
I seems a lot of problem with the example.
I think I will re-work the example according to your comments.
Thanks for your comments.
-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 7:30 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
On 1/29/22 04:03, Andy Pei wrote:
> Add virtio blk device support to vdpa example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   examples/vdpa/Makefile           |   2 +-
>   examples/vdpa/main.c             |   8 ++
>   examples/vdpa/meson.build        |   1 +
>   examples/vdpa/vdpa_blk_compact.c | 150 +++++++++++++++++++++++++++++++
>   examples/vdpa/vdpa_blk_compact.h | 117 ++++++++++++++++++++++++
>   examples/vdpa/vhost_user.h       | 189 +++++++++++++++++++++++++++++++++++++++
>   6 files changed, 466 insertions(+), 1 deletion(-)
>   create mode 100644 examples/vdpa/vdpa_blk_compact.c
>   create mode 100644 examples/vdpa/vdpa_blk_compact.h
>   create mode 100644 examples/vdpa/vhost_user.h
> 
> diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile index 
> d974db4..9d0479b 100644
> --- a/examples/vdpa/Makefile
> +++ b/examples/vdpa/Makefile
> @@ -5,7 +5,7 @@
>   APP = vdpa
>   
>   # all source are stored in SRCS-y
> -SRCS-y := main.c
> +SRCS-y := main.c vdpa_blk_compact.c
>   CFLAGS += -DALLOW_EXPERIMENTAL_API
>   
>   PKGCONF ?= pkg-config
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index 
> 5ab0765..924ad7b 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>   #include <cmdline_parse_string.h>
>   #include <cmdline_parse_num.h>
>   #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
>   
>   #define MAX_PATH_LEN 128
>   #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -156,6 +157,7 @@ struct vdpa_port {
>   static const struct rte_vhost_device_ops vdpa_sample_devops = {
>   	.new_device = new_device,
>   	.destroy_device = destroy_device,
> +	.new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
>   };
>   
>   static int
> @@ -192,6 +194,12 @@ struct vdpa_port {
>   			"attach vdpa device failed: %s\n",
>   			socket_path);
>   
> +	if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
> +		< 0)
> +		rte_exit(EXIT_FAILURE,
> +			"set vhost blk driver features and protocol features failed: %s\n",
> +			socket_path);
> +
That does not look right, blk devices specitic functions shuold be called only for block devices.
>   	if (rte_vhost_driver_start(socket_path) < 0)
>   		rte_exit(EXIT_FAILURE,
>   			"start vhost driver failed: %s\n", diff --git 
> a/examples/vdpa/meson.build b/examples/vdpa/meson.build index 
> bd08605..f0d111c 100644
> --- a/examples/vdpa/meson.build
> +++ b/examples/vdpa/meson.build
> @@ -15,4 +15,5 @@ deps += 'vhost'
>   allow_experimental_apis = true
>   sources = files(
>           'main.c',
> +	'vdpa_blk_compact.c',
>   )
> diff --git a/examples/vdpa/vdpa_blk_compact.c 
> b/examples/vdpa/vdpa_blk_compact.c
> new file mode 100644
> index 0000000..0c4d3ee
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.c
> @@ -0,0 +1,150 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +/* @file
> + *
> + * Block device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_malloc.h>
> +#include <vdpa_driver.h>
That's wrong, the application is not supposed to include the driver APIs.
> +#include <rte_vhost.h>
> +#include "vdpa_blk_compact.h"
> +#include "vhost_user.h"
> +
> +#define VHOST_USER_GET_CONFIG	24
> +#define VHOST_USER_SET_CONFIG	25
> +
> +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> +#define VHOST_USER_PROTOCOL_F_CONFIG   9
> +#endif
> +
> +/*
> + * Function to handle vhost user blk message  */ static enum 
> +rte_vhost_msg_result rte_vhost_blk_extern_vhost_pre_msg_handler(int 
> +vid, void *_msg) {
> +	struct VhostUserMsg *msg = _msg;
> +	struct rte_vdpa_device *vdev = NULL;
> +
> +	vdev = rte_vhost_get_vdpa_device(vid);
> +	if (vdev == NULL)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	fprintf(stderr, "msg is %d\n", msg->request.master);
> +	switch (msg->request.master) {
> +	case VHOST_USER_GET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
> +
> +		if (vdev->ops->get_config) {
> +			fprintf(stdout, "get_config() function is valid!\n");
> +			rc = vdev->ops->get_config(vid,
> +						   msg->payload.cfg.region,
> +						   msg->payload.cfg.size);
> +			if (rc != 0) {
> +				msg->size = 0;
> +				fprintf(stdout, "get_config() return error!\n");
> +			}
> +		} else {
> +			fprintf(stdout, "get_config() function is invalid!\n");
> +		}
> +
> +		return RTE_VHOST_MSG_RESULT_REPLY;
> +	}
> +	case VHOST_USER_SET_CONFIG: {
> +		int rc = 0;
> +
> +		fprintf(stdout,
> +			"read message VHOST_USER_SET_CONFIG\n");
> +
> +		if (vdev->ops->set_config) {
> +			rc = vdev->ops->set_config(vid,
> +				msg->payload.cfg.region,
> +				msg->payload.cfg.offset,
> +				msg->payload.cfg.size,
> +				msg->payload.cfg.flags);
> +		}
> +
> +		return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
> +	}
> +	default:
> +		break;
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_NOT_HANDLED; }
I think above message handling should be done in the Vhost library directly. VHOST_USER_SET_CONFIG and VHOST_USER_GET_CONFIG are not specific to blk backends, these are generic messages.
> +
> +struct rte_vhost_user_extern_ops g_blk_extern_vhost_ops = {
> +	.pre_msg_handle = rte_vhost_blk_extern_vhost_pre_msg_handler,
> +	.post_msg_handle = NULL,
> +};
> +
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid) {
> +	int rc;
> +
> +	rc = rte_vhost_extern_callback_register(vid,
> +						&g_blk_extern_vhost_ops,
> +						NULL);
> +	if (rc != 0) {
> +		fprintf(stderr, "%s() failed for vid = %d\n",  __func__, vid);
> +		return -1;
> +	}
> +	fprintf(stdout, "register extern vhost ops on vid = %d\n", vid);
> +	return 0;
> +}
> +
> +
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev)
> +{
> +	uint64_t protocol_features = 0;
> +
> +	if (!vdev) {
> +		fprintf(stdout, "vdev is NULL.\n");
> +		return -EINVAL;
> +	}
> +
> +	/* vdpa net does not have the get_config */
> +	if (!vdev->ops->get_config)
> +		return 0;
That's not good, as I said earlier, the drivers callback should not be visible to the application. Maybe the VDPA API should be extended to return the device type, I'm not sure, but accessing the drivers ops is prohibited.
> +	rte_vhost_driver_set_features(path, SPDK_VHOST_BLK_FEATURES_BASE);
> +	rte_vhost_driver_disable_features(path,
> +		SPDK_VHOST_BLK_DISABLED_FEATURES);
> +
> +	rte_vhost_driver_get_protocol_features(path, &protocol_features);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> +	rte_vhost_driver_set_protocol_features(path, protocol_features);
> +
> +	return 0;
> +}
> diff --git a/examples/vdpa/vdpa_blk_compact.h 
> b/examples/vdpa/vdpa_blk_compact.h
> new file mode 100644
> index 0000000..420d48e
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,117 @@
> +/*    INTEL CONFIDENTIAL
I hope it was not supposed to be confidential :)
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +/vdpa
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
> +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
> +
> +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ #define 
> +VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH #endif /* !VIRTIO_BLK_NO_LEGACY 
> +*/
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES #define 
> +VHOST_USER_F_PROTOCOL_FEATURES 30 #endif
> +
> +#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
Why these references to SPDK?
> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> +
> +#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> +
> +#define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
> +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> +
> +/* Vhost-blk support protocol features */ #define 
> +SPDK_VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * This function will set vhost user block
> + *
> + * @param path
> + *  socket path
> + */
> +int
> +vdpa_blk_device_set_features_and_protocol(const char *path,
> +	struct rte_vdpa_device *vdev);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Install external hook to handle vhost user block message
> + *
> + * @param vid
> + *  vhost device id
> + */
> +int
> +rte_vhost_blk_session_install_rte_compat_hooks(int vid);
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h 
> new file mode 100644 index 0000000..8b747d0
> --- /dev/null
> +++ b/examples/vdpa/vhost_user.h
> @@ -0,0 +1,189 @@
> +/*    INTEL CONFIDENTIAL
> + *
> + *    Copyright (c) Intel Corporation.
> + *    All rights reserved.
> + *
> + *    The source code contained or described herein and all documents related
> + *    to the source code ("Material") are owned by Intel Corporation or its
> + *    suppliers or licensors.  Title to the Material remains with Intel
> + *    Corporation or its suppliers and licensors.  The Material contains trade
> + *    secrets and proprietary and confidential information of Intel or its
> + *    suppliers and licensors.  The Material is protected by worldwide
> + *    copyright and trade secret laws and treaty provisions.  No part of the
> + *    Material may be used, copied, reproduced, modified, published, uploaded,
> + *    posted, transmitted, distributed, or disclosed in any way without Intel's
> + *    prior express written permission.
> + *
> + *    No license under any patent, copyright, trade secret or other
> + *    intellectual property right is granted to or conferred upon you by
> + *    disclosure or delivery of the Materials, either expressly, by
> + *    implication, inducement, estoppel or otherwise.  Any license under such
> + *    intellectual property rights must be express and approved by Intel in
> + *    writing.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "rte_vhost.h"
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif
> +
> +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
> +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SET_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> +	VHOST_USER_POSTCOPY_ADVISE = 28,
> +	VHOST_USER_POSTCOPY_LISTEN = 29,
> +	VHOST_USER_POSTCOPY_END = 30,
> +	VHOST_USER_GET_INFLIGHT_FD = 31,
> +	VHOST_USER_SET_INFLIGHT_FD = 32,
> +	VHOST_USER_MAX = 33
> +} VhostUserRequest;
> +
> +typedef enum VhostUserSlaveRequest {
> +	VHOST_USER_SLAVE_NONE = 0,
> +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> +	VHOST_USER_SLAVE_MAX
> +} VhostUserSlaveRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserLog {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +} VhostUserLog;
> +
> +/* Comply with Cryptodev-Linux */
> +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> +
> +/* Same structure as vhost-user backend session info */ typedef 
> +struct VhostUserCryptoSessionParam {
> +	int64_t session_id;
> +	uint32_t op_code;
> +	uint32_t cipher_algo;
> +	uint32_t cipher_key_len;
> +	uint32_t hash_algo;
> +	uint32_t digest_len;
> +	uint32_t auth_key_len;
> +	uint32_t aad_len;
> +	uint8_t op_type;
> +	uint8_t dir;
> +	uint8_t hash_mode;
> +	uint8_t chaining_dir;
> +	uint8_t *ciphe_key;
> +	uint8_t *auth_key;
> +	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> +	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> +} VhostUserCryptoSessionParam;
> +
> +typedef struct VhostUserVringArea {
> +	uint64_t u64;
> +	uint64_t size;
> +	uint64_t offset;
> +} VhostUserVringArea;
> +
> +typedef struct VhostUserInflight {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +	uint16_t num_queues;
> +	uint16_t queue_size;
> +} VhostUserInflight;
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
> +typedef struct VhostUserMsg {
> +	union {
> +		uint32_t master; /* a VhostUserRequest value */
> +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> +	} request;
> +
> +#define VHOST_USER_VERSION_MASK     0x3
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   0xff
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +		VhostUserMemory memory;
> +		VhostUserLog    log;
> +		struct vhost_iotlb_msg iotlb;
> +		VhostUserCryptoSessionParam crypto_session;
> +		VhostUserVringArea area;
> +		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +	int fd_num;
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    0x1
> +#endif
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-27 14:51   ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (15 more replies)
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (5 subsequent siblings)
  8 siblings, 16 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
  Fix some coding style issues.
Andy Pei (16):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA lauch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured
 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 523 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/main.c             |  61 ++++-
 examples/vdpa/vdpa_blk_compact.h |  72 ++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  65 +++++
 lib/vhost/vhost_user.h           |  15 ++
 usertools/dpdk-devbind.py        |   8 +
 10 files changed, 937 insertions(+), 55 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 02/16] vhost: add vdpa ops for " Andy Pei
                       ` (14 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 02/16] vhost: add vdpa ops for blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-27 14:51     ` [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
                       ` (13 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-27 14:51     ` [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-03-27 14:51     ` [PATCH v4 02/16] vhost: add vdpa ops for " Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 15 +++++++++++++++
 2 files changed, 65 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..b11fafd 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR, "get_config() return error!\n");
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "get_config() not supportted!\n");
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "set_config() not supportted!\n");
+	}
+
+	return ret == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..d3f014e 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,18 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +162,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 04/16] vdpa/ifc: add blk ops for ifc device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 05/16] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (10 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For the block device type, we have to relay
the commands on all queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..8d104b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 07/16] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add SW live-migration support to block device.
Add dirty page logging to block device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..e417c50 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -191,7 +191,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -240,7 +240,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8d104b7..a23dc2d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
 	}
 }
 
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		ifcvf_disable_logging(hw);
+		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+				log_base, IFCVF_LOG_BASE, log_size);
+		/*
+		 * IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
 		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
 static int
@@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
-			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
-			if (gpa == 0) {
-				DRV_LOG(ERR, "Fail to get GPA for used ring.");
-				return -1;
+		if (internal->device_type == IFCVF_NET) {
+			/* Direct I/O for Tx queue, relay for Rx queue */
+			if (i & 1) {
+				gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+				if (gpa == 0) {
+					DRV_LOG(ERR, "Fail to get GPA for used ring.");
+					return -1;
+				}
+				hw->vring[i].used = gpa;
+			} else {
+				hw->vring[i].used = m_vring_iova +
+					(char *)internal->m_vring[i].used -
+					(char *)internal->m_vring[i].desc;
 			}
-			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
+			/* BLK: relay every queue */
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 07/16] example/vdpa:add vdpa blk support in example
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 08/16] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to vdpa example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             |  61 +++++++++++++-
 examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..1c809ab 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -41,6 +42,7 @@ struct vdpa_port {
 static int devcnt;
 static int interactive;
 static int client_mode;
+static int isblk;
 
 /* display usage */
 static void
@@ -49,7 +51,8 @@ struct vdpa_port {
 	printf("Usage: %s [EAL options] -- "
 				 "	--interactive|-i: run in interactive mode.\n"
 				 "	--iface <path>: specify the path prefix of the socket files, e.g. /tmp/vhost-user-.\n"
-				 "	--client: register a vhost-user socket as client mode.\n",
+				 "	--client: register a vhost-user socket as client mode.\n"
+				 "	--isblk: device is a block device, e.g. virtio_blk device.\n",
 				 prgname);
 }
 
@@ -61,6 +64,7 @@ struct vdpa_port {
 		{"iface", required_argument, NULL, 0},
 		{"interactive", no_argument, &interactive, 1},
 		{"client", no_argument, &client_mode, 1},
+		{"isblk", no_argument, &isblk, 1},
 		{NULL, 0, 0, 0},
 	};
 	int opt, idx;
@@ -159,6 +163,52 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_VDPA_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
 	int ret;
@@ -192,6 +242,15 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (isblk) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..e7c0f22
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..137bbc2
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 08/16] usertools: add support for virtio blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 07/16] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 20:01       ` Stephen Hemminger
  2022-03-27 14:51     ` [PATCH v4 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  15 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 10/16] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a23dc2d..28191e4 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1368,6 +1368,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1385,7 +1395,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 10/16] vdpa/ifc: add some log at VDPA lauch before qemu connect
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 28191e4..9bc2f47 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1431,6 +1431,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	__u64 capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1497,6 +1500,32 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/**
+		** cannot read 64-bit register in one attempt,
+		** so read byte by byte.
+		**/
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (__u64)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 11/16] vdpa/ifc: read virtio max_queues from hardware
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 10/16] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9bc2f47..20a0b01 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1526,6 +1526,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 12/16] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 113 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 20a0b01..826b408 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -608,6 +610,108 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+
+	return;
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -634,10 +738,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -958,6 +1068,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 826b408..95538c1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1605,11 +1605,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-03-27 14:51     ` [PATCH v4 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index e417c50..d923266 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  2022-03-27 14:51     ` [PATCH v4 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d923266..d89cb73 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -118,7 +118,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 95538c1..36fd850 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -351,23 +351,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -752,7 +761,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v4 16/16] vhost: make sure each queue callfd is configured
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-03-27 14:51     ` [PATCH v4 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-03-27 14:51     ` Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-27 14:51 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index b11fafd..8c5904f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3197,12 +3197,27 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/**
+		** VIRTIO_DEV_VDPA_CONFIGURED already configured
+		** close the device and open the device again,
+		** make sure the call fd of each queue is configed to haedware.
+		**/
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v4 08/16] usertools: add support for virtio blk device
  2022-03-27 14:51     ` [PATCH v4 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-03-27 20:01       ` Stephen Hemminger
  0 siblings, 0 replies; 263+ messages in thread
From: Stephen Hemminger @ 2022-03-27 20:01 UTC (permalink / raw)
  To: Andy Pei; +Cc: dev, chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
On Sun, 27 Mar 2022 22:51:31 +0800
Andy Pei <andy.pei@intel.com> wrote:
> Signed-off-by: Andy Pei <andy.pei@intel.com>
Shouldn't we just recommend driverctl instead?
I had patches for devbind to use vmbus rejected because of that.
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (2 preceding siblings ...)
  2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-28  7:17   ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (15 more replies)
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (4 subsequent siblings)
  8 siblings, 16 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v5:
 fix some coding style issues.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.
Andy Pei (16):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
    device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
    virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
    from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
    same when blk device pause
  vhost: make sure each queue callfd is configured
 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 521 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/main.c             |  61 ++++-
 examples/vdpa/vdpa_blk_compact.h |  72 ++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  64 +++++
 lib/vhost/vhost_user.h           |  15 ++
 usertools/dpdk-devbind.py        |   8 +
 10 files changed, 934 insertions(+), 55 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 02/16] vhost: add vdpa ops for " Andy Pei
                       ` (14 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 02/16] vhost: add vdpa ops for blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-28  7:17     ` [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
                       ` (13 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-03-28  7:17     ` [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-03-28  7:17     ` [PATCH v5 02/16] vhost: add vdpa ops for " Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-04-20 13:53       ` Xia, Chenbo
  2022-03-28  7:17     ` [PATCH v5 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  15 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 15 +++++++++++++++
 2 files changed, 65 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..55e8bd0 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR, "get_config() return error!\n");
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "get_config() not supported!\n");
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "set_config() not supported!\n");
+	}
+
+	return ret == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..d3f014e 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,18 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +162,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 04/16] vdpa/ifc: add blk ops for ifc device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 05/16] vdpa/ifc: add vdpa interrupt for blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (10 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For the block device type, we have to relay
the commands on all queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..8d104b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 06/16] vdpa/ifc: add block device SW live-migration
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 07/16] example/vdpa:add vdpa blk support in example Andy Pei
                       ` (9 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add SW live-migration support to block device.
Add dirty page logging to block device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..e417c50 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -191,7 +191,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -240,7 +240,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8d104b7..a23dc2d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
 	}
 }
 
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		ifcvf_disable_logging(hw);
+		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+				log_base, IFCVF_LOG_BASE, log_size);
+		/*
+		 * IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
 		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
 static int
@@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
-			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
-			if (gpa == 0) {
-				DRV_LOG(ERR, "Fail to get GPA for used ring.");
-				return -1;
+		if (internal->device_type == IFCVF_NET) {
+			/* Direct I/O for Tx queue, relay for Rx queue */
+			if (i & 1) {
+				gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+				if (gpa == 0) {
+					DRV_LOG(ERR, "Fail to get GPA for used ring.");
+					return -1;
+				}
+				hw->vring[i].used = gpa;
+			} else {
+				hw->vring[i].used = m_vring_iova +
+					(char *)internal->m_vring[i].used -
+					(char *)internal->m_vring[i].desc;
 			}
-			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
+			/* BLK: relay every queue */
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 07/16] example/vdpa:add vdpa blk support in example
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 08/16] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to vdpa example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             |  61 +++++++++++++-
 examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..1c809ab 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -41,6 +42,7 @@ struct vdpa_port {
 static int devcnt;
 static int interactive;
 static int client_mode;
+static int isblk;
 
 /* display usage */
 static void
@@ -49,7 +51,8 @@ struct vdpa_port {
 	printf("Usage: %s [EAL options] -- "
 				 "	--interactive|-i: run in interactive mode.\n"
 				 "	--iface <path>: specify the path prefix of the socket files, e.g. /tmp/vhost-user-.\n"
-				 "	--client: register a vhost-user socket as client mode.\n",
+				 "	--client: register a vhost-user socket as client mode.\n"
+				 "	--isblk: device is a block device, e.g. virtio_blk device.\n",
 				 prgname);
 }
 
@@ -61,6 +64,7 @@ struct vdpa_port {
 		{"iface", required_argument, NULL, 0},
 		{"interactive", no_argument, &interactive, 1},
 		{"client", no_argument, &client_mode, 1},
+		{"isblk", no_argument, &isblk, 1},
 		{NULL, 0, 0, 0},
 	};
 	int opt, idx;
@@ -159,6 +163,52 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_VDPA_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
 	int ret;
@@ -192,6 +242,15 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (isblk) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..e7c0f22
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..137bbc2
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 08/16] usertools: add support for virtio blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 07/16] example/vdpa:add vdpa blk support in example Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
                       ` (7 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 10/16] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a23dc2d..28191e4 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1368,6 +1368,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1385,7 +1395,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 10/16] vdpa/ifc: add some log at VDPA launch before qemu connect
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 28191e4..045623b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1431,6 +1431,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1497,6 +1500,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 11/16] vdpa/ifc: read virtio max_queues from hardware
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 10/16] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
                       ` (4 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 045623b..e8e7d61 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1525,6 +1525,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 12/16] vdpa: add config space change interrupt register and handle for virtio_blk
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
                       ` (3 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e8e7d61..c02ae4d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -608,6 +610,107 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -634,10 +737,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -958,6 +1067,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
                       ` (2 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add is_blk flag to ifcvf_hw, and init is_blk during probe.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c02ae4d..f54beaf 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1604,11 +1604,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
  2022-03-28  7:17     ` [PATCH v5 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index e417c50..d923266 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  2022-03-28  7:17     ` [PATCH v5 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d923266..d89cb73 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -118,7 +118,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f54beaf..578bf6c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -351,23 +351,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -751,7 +760,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v5 16/16] vhost: make sure each queue callfd is configured
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-03-28  7:17     ` [PATCH v5 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
@ 2022-03-28  7:17     ` Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-03-28  7:17 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 55e8bd0..43154c0 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3197,12 +3197,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-03-28  7:17     ` [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
@ 2022-04-20 13:53       ` Xia, Chenbo
  2022-04-21  8:05         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-04-20 13:53 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Andy,
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, March 28, 2022 3:17 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG
Let's make the title a bit short...
./devtools/check-git-log.sh will help you find other similar errors for other
patches.
> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 50
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 15 +++++++++++++++
>  2 files changed, 65 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..55e8bd0 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>  	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>  	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>  	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>  	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR, "get_config() return error!\n");
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "get_config() not supported!\n");
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "set_config() not supported!\n");
> +	}
> +
> +	return ret == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
I think when set_config fails in vdpa driver, it should not break message handler by
returning RESULT_ERR here.
All error log above, please print dev->ifname too, which will be user-friendly.
> +}
> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct
> virtio_net **pdev,
>  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
>  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
>  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
>  	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
>  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
>  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index c946cc2..d3f014e 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -125,6 +127,18 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif
For this config size, maybe '+#define VHOST_USER_MAX_CONFIG_SIZE 256' is enough?
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
Since the max size is defined, in the handler, we should check the size in the msg
handler.
Thanks,
Chenbo
> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -148,6 +162,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
  2022-04-20 13:53       ` Xia, Chenbo
@ 2022-04-21  8:05         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-04-21  8:05 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
HI  Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Wednesday, April 20, 2022 9:53 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v5 03/16] vhost: add support for
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG
>
> Hi Andy,
>
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Monday, March 28, 2022 3:17 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v5 03/16] vhost: add support for
> VHOST_USER_GET_CONFIG
> > and VHOST_USER_SET_CONFIG
>
> Let's make the title a bit short...
>
> ./devtools/check-git-log.sh will help you find other similar errors for other
> patches.
>
OK, I will send out V6 to fix commit log title.
Thanks for your suggestion.
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 50
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 15 +++++++++++++++
> >  2 files changed, 65 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..55e8bd0 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >     [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >     [VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >     [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +   [VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +   [VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >     [VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >     [VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >     [VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE", @@
> > -2542,6 +2544,52 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +                   struct vhu_msg_context *ctx,
> > +                   int main_fd __rte_unused)
> > +{
> > +   struct virtio_net *dev = *pdev;
> > +   struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +   int ret = 0;
> > +
> > +   if (vdpa_dev->ops->get_config) {
> > +           ret = vdpa_dev->ops->get_config(dev->vid,
> > +                                      ctx->msg.payload.cfg.region,
> > +                                      ctx->msg.payload.cfg.size);
> > +           if (ret != 0) {
> > +                   ctx->msg.size = 0;
> > +                   VHOST_LOG_CONFIG(ERR, "get_config() return
> error!\n");
> > +           }
> > +   } else {
> > +           VHOST_LOG_CONFIG(ERR, "get_config() not supported!\n");
> > +   }
> > +
> > +   return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +                   struct vhu_msg_context *ctx,
> > +                   int main_fd __rte_unused)
> > +{
> > +   struct virtio_net *dev = *pdev;
> > +   struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +   int ret = 0;
> > +
> > +   if (vdpa_dev->ops->set_config) {
> > +           ret = vdpa_dev->ops->set_config(dev->vid,
> > +                   ctx->msg.payload.cfg.region,
> > +                   ctx->msg.payload.cfg.offset,
> > +                   ctx->msg.payload.cfg.size,
> > +                   ctx->msg.payload.cfg.flags);
> > +   } else {
> > +           VHOST_LOG_CONFIG(ERR, "set_config() not supported!\n");
> > +   }
> > +
> > +   return ret == 0 ? RTE_VHOST_MSG_RESULT_OK :
> > +RTE_VHOST_MSG_RESULT_ERR;
>
> I think when set_config fails in vdpa driver, it should not break message
> handler by returning RESULT_ERR here.
>
I will return RTE_VHOST_MSG_RESULT_OK and output some log.
> All error log above, please print dev->ifname too, which will be user-friendly.
>
Sure.Thansk.
> > +}
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >                     struct vhu_msg_context *ctx,
> >                     int main_fd __rte_unused)
> > @@ -2782,6 +2830,8 @@ typedef int (*vhost_message_handler_t)(struct
> > virtio_net **pdev,
> >     [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> >     [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> >     [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > +   [VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > +   [VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> >     [VHOST_USER_POSTCOPY_ADVISE] =
> vhost_user_set_postcopy_advise,
> >     [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> >     [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> git
> > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > c946cc2..d3f014e 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >     VHOST_USER_NET_SET_MTU = 20,
> >     VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >     VHOST_USER_IOTLB_MSG = 22,
> > +   VHOST_USER_GET_CONFIG = 24,
> > +   VHOST_USER_SET_CONFIG = 25,
> >     VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >     VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >     VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -125,6 +127,18 @@
> >     uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> > +#define VHOST_USER_MAX_CONFIG_SIZE         256
> > +#endif
>
> For this config size, maybe '+#define VHOST_USER_MAX_CONFIG_SIZE 256' is
> enough?
>
Sure.
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +   uint32_t offset;
> > +   uint32_t size;
> > +   uint32_t flags;
> > +   uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
>
> Since the max size is defined, in the handler, we should check the size in the
> msg handler.
Sure.
>
> Thanks,
> Chenbo
>
> > +
> >  typedef struct VhostUserMsg {
> >     union {
> >             uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> +162,7 @@
> >             VhostUserCryptoSessionParam crypto_session;
> >             VhostUserVringArea area;
> >             VhostUserInflight inflight;
> > +           struct vhost_user_config cfg;
> >     } payload;
> >     /* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (3 preceding siblings ...)
  2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-21  8:33   ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (15 more replies)
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (3 subsequent siblings)
  8 siblings, 16 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v6:
 fix some commit log.
 add vhost socket in log output to make it more user-friendly.
 when driver ops fail, just output some log, do not break message handler.
 check vhost msg size in msg handler.
v5:
 fix some coding style issues.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.
Andy Pei (16):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  examples/vdpa: add vDPA blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: add set vring state for blk device
  vdpa/ifc: add some log at vDPA launch before qemu connect
  vdpa/ifc: read virtio max queues from hardware
  vdpa/ifc: add interrupt and handle for virtio blk
  vdpa/ifc: add is blk flag to ifcvf HW struct
  vdpa/ifc/base: access correct register for blk device
  vdpa/ifc: blk device pause without no inflight IO
  vhost: make sure each queue callfd is configured
 drivers/vdpa/ifc/base/ifcvf.c    |  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 521 ++++++++++++++++++++++++++++++++++++---
 examples/vdpa/main.c             |  61 ++++-
 examples/vdpa/vdpa_blk_compact.h |  72 ++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++
 lib/vhost/vdpa_driver.h          |   8 +-
 lib/vhost/vhost_user.c           |  83 +++++++
 lib/vhost/vhost_user.h           |  13 +
 usertools/dpdk-devbind.py        |   8 +
 10 files changed, 951 insertions(+), 55 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 02/16] vhost: add vDPA ops for " Andy Pei
                       ` (14 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 02/16] vhost: add vDPA ops for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-21  8:33     ` [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
                       ` (13 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-21  8:33     ` [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-04-21  8:33     ` [PATCH v6 02/16] vhost: add vDPA ops for " Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 12:42       ` Xia, Chenbo
  2022-04-25 13:04       ` David Marchand
  2022-04-21  8:33     ` [PATCH v6 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (12 subsequent siblings)
  15 siblings, 2 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 13 ++++++++++
 2 files changed, 82 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..3780804 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) get_config() return error!\n",
+					 dev->ifname);
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) invalid set config msg size: %"PRId32" != %d\n",
+			dev->ifname, ctx->msg.size,
+			(int)sizeof(struct vhost_user_config));
+		goto OUT;
+	}
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+		if (ret)
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) set_config() return error!\n",
+					 dev->ifname);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supportted!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+
+OUT:
+	return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..97cfb2f 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,16 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +160,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 04/16] vdpa/ifc: add blk ops for ifc device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
                       ` (11 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 12:58       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (10 subsequent siblings)
  15 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For the block device type, we have to relay
the commands on all queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..8d104b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
 	for (i = 0; i < nr_vring; i++)
 		internal->intr_fd[i] = -1;
 
-	for (i = 0; i < nr_vring; i++) {
-		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
-			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-			if (fd < 0) {
-				DRV_LOG(ERR, "can't setup eventfd: %s",
-					strerror(errno));
-				return -1;
+	if (internal->device_type == IFCVF_NET) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if ((i & 1) == 0 && m_rx == true) {
+				/* For the net we only need to relay rx queue,
+				 * which will change the mem of VM.
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+			}
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+			if (m_rx == true) {
+				/* For the blk we need to relay all the read cmd
+				 * of each queue
+				 */
+				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+				if (fd < 0) {
+					DRV_LOG(ERR, "can't setup eventfd: %s",
+						strerror(errno));
+					return -1;
+				}
+				internal->intr_fd[i] = fd;
+				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 			}
-			internal->intr_fd[i] = fd;
-			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
 		}
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 13:10       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example Andy Pei
                       ` (9 subsequent siblings)
  15 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add SW live-migration support to block device.
Add dirty page logging to block device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 115 insertions(+), 23 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..e417c50 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -191,7 +191,7 @@
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -240,7 +240,7 @@
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8d104b7..a23dc2d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
 	}
 }
 
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		ifcvf_disable_logging(hw);
+		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+				log_base, IFCVF_LOG_BASE, log_size);
+		/*
+		 * IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
 		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
 static int
@@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
-			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
-			if (gpa == 0) {
-				DRV_LOG(ERR, "Fail to get GPA for used ring.");
-				return -1;
+		if (internal->device_type == IFCVF_NET) {
+			/* Direct I/O for Tx queue, relay for Rx queue */
+			if (i & 1) {
+				gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+				if (gpa == 0) {
+					DRV_LOG(ERR, "Fail to get GPA for used ring.");
+					return -1;
+				}
+				hw->vring[i].used = gpa;
+			} else {
+				hw->vring[i].used = m_vring_iova +
+					(char *)internal->m_vring[i].used -
+					(char *)internal->m_vring[i].desc;
 			}
-			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
+			/* BLK: relay every queue */
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 13:38       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 08/16] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  15 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to vDPA example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             |  61 +++++++++++++-
 examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
 examples/vdpa/vhost_user.h       | 169 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..1c809ab 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -41,6 +42,7 @@ struct vdpa_port {
 static int devcnt;
 static int interactive;
 static int client_mode;
+static int isblk;
 
 /* display usage */
 static void
@@ -49,7 +51,8 @@ struct vdpa_port {
 	printf("Usage: %s [EAL options] -- "
 				 "	--interactive|-i: run in interactive mode.\n"
 				 "	--iface <path>: specify the path prefix of the socket files, e.g. /tmp/vhost-user-.\n"
-				 "	--client: register a vhost-user socket as client mode.\n",
+				 "	--client: register a vhost-user socket as client mode.\n"
+				 "	--isblk: device is a block device, e.g. virtio_blk device.\n",
 				 prgname);
 }
 
@@ -61,6 +64,7 @@ struct vdpa_port {
 		{"iface", required_argument, NULL, 0},
 		{"interactive", no_argument, &interactive, 1},
 		{"client", no_argument, &client_mode, 1},
+		{"isblk", no_argument, &isblk, 1},
 		{NULL, 0, 0, 0},
 	};
 	int opt, idx;
@@ -159,6 +163,52 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_VDPA_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
 	int ret;
@@ -192,6 +242,15 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	if (isblk) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..e7c0f22
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
+
+#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
new file mode 100644
index 0000000..137bbc2
--- /dev/null
+++ b/examples/vdpa/vhost_user.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_vhost.h"
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+#ifndef VHOST_USER_MAX_CONFIG_SIZE
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+#endif
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SET_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_GET_INFLIGHT_FD = 31,
+	VHOST_USER_SET_INFLIGHT_FD = 32,
+	VHOST_USER_MAX = 33
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+	VHOST_USER_SLAVE_NONE = 0,
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint16_t num_queues;
+	uint16_t queue_size;
+} VhostUserInflight;
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
+typedef struct VhostUserMsg {
+	union {
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
+	} request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY		(0x1 << 3)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
+		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+#endif
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 08/16] usertools: add support for virtio blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-25 13:53       ` Xia, Chenbo
  2022-04-21  8:33     ` [PATCH v6 09/16] vdpa/ifc: add set vring state for " Andy Pei
                       ` (7 subsequent siblings)
  15 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
                  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
     Displays to the user what devices are bound to the igb_uio driver, the
     kernel driver or to no driver'''
 
+    if status_dev in ["virtio_blk", "all"]:
+        show_device_status(virtio_blk_devices, "virtio_blk")
+
     if status_dev in ["net", "all"]:
         show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
         if b_flag is not None:
             clear_data()
             # refresh if we have changed anything
+            get_device_details(virtio_blk_devices)
             get_device_details(network_devices)
             get_device_details(baseband_devices)
             get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
     parse_args()
     check_modules()
     clear_data()
+    get_device_details(virtio_blk_devices)
     get_device_details(network_devices)
     get_device_details(baseband_devices)
     get_device_details(crypto_devices)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 09/16] vdpa/ifc: add set vring state for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 10/16] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index a23dc2d..28191e4 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1368,6 +1368,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1385,7 +1395,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 10/16] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 09/16] vdpa/ifc: add set vring state for " Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 11/16] vdpa/ifc: read virtio max queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 28191e4..045623b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1431,6 +1431,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1497,6 +1500,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 11/16] vdpa/ifc: read virtio max queues from hardware
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 10/16] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 12/16] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
                       ` (4 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 045623b..e8e7d61 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1525,6 +1525,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 12/16] vdpa/ifc: add interrupt and handle for virtio blk
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 11/16] vdpa/ifc: read virtio max queues from hardware Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 13/16] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
                       ` (3 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e8e7d61..c02ae4d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -608,6 +610,107 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -634,10 +737,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -958,6 +1067,9 @@ struct rte_vdpa_dev_info {
 		vdpa_ifcvf_stop(internal);
 	else if (internal->device_type == IFCVF_BLK)
 		vdpa_ifcvf_blk_pause(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 13/16] vdpa/ifc: add is blk flag to ifcvf HW struct
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 12/16] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 14/16] vdpa/ifc/base: access correct register for blk device Andy Pei
                       ` (2 subsequent siblings)
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add is_blk flag to ifcvf_hw, and init is_blk during probe.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c02ae4d..f54beaf 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1604,11 +1604,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 14/16] vdpa/ifc/base: access correct register for blk device
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 13/16] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 15/16] vdpa/ifc: blk device pause without no inflight IO Andy Pei
  2022-04-21  8:33     ` [PATCH v6 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index e417c50..d923266 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 15/16] vdpa/ifc: blk device pause without no inflight IO
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 14/16] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  2022-04-21  8:33     ` [PATCH v6 16/16] vhost: make sure each queue callfd is configured Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d923266..d89cb73 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -118,7 +118,7 @@
 	IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
 	ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f54beaf..578bf6c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -351,23 +351,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	int i, vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
@@ -751,7 +760,12 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
-		vdpa_ifcvf_stop(internal);
+		if (internal->device_type == IFCVF_BLK) {
+			vdpa_ifcvf_blk_pause(internal);
+			ifcvf_reset(&internal->hw);
+		} else {
+			vdpa_ifcvf_stop(internal);
+		}
 
 		ret = vdpa_disable_vfio_intr(internal);
 		if (ret)
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v6 16/16] vhost: make sure each queue callfd is configured
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-04-21  8:33     ` [PATCH v6 15/16] vdpa/ifc: blk device pause without no inflight IO Andy Pei
@ 2022-04-21  8:33     ` Andy Pei
  15 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-21  8:33 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 3780804..e6f4113 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3216,12 +3216,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
@ 2022-04-25 12:42       ` Xia, Chenbo
  2022-04-26  8:55         ` Pei, Andy
  2022-04-25 13:04       ` David Marchand
  1 sibling, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-04-25 12:42 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Andy,
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 03/16] vhost: add vhost msg support
> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 69
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 13 ++++++++++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..3780804 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>  	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>  	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>  	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>  	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) get_config() return error!\n",
> +					 dev->ifname);
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",
Supported 
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
I think you should do sanity check on payload.cfg.size and make sure it's smaller
than VHOST_USER_MAX_CONFIG_SIZE
and same check for offset
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) invalid set config msg size: %"PRId32" != %d\n",
> +			dev->ifname, ctx->msg.size,
Based on you will change the log too, payload.cfg.size is uint32_t,
so PRId32 -> PRIu32
> +			(int)sizeof(struct vhost_user_config));
So this can be %u
> +		goto OUT;
> +	}
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +		if (ret)
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) set_config() return error!\n",
> +					 dev->ifname);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supportted!\n",
Supported
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_OK;
> +
> +OUT:
Lower case looks better
> +	return RTE_VHOST_MSG_RESULT_ERR;
> +}
Almost all handlers need check on expected fd num (this case is 0), so the above new
handlers should also do that. Please refer to validate_msg_fds in other handlers.
BTW, you can wait for review for other patches and send new versions later.
Thanks,
Chenbo
> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> virtio_net **pdev,
>  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
>  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
>  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
>  	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
>  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
>  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index c946cc2..97cfb2f 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -125,6 +127,16 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -148,6 +160,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-21  8:33     ` [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-04-25 12:58       ` Xia, Chenbo
  2022-04-26  9:56         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-04-25 12:58 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Andy,
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
> 
> For the block device type, we have to relay
> the commands on all queues.
It's a bit short... although I can understand, please add some background
on current implementation for others to easily understand.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ++++++++++++++++++++++++++++++++------
> -----
>  1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8ee041f..8d104b7 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
>  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>  	irq_set->start = 0;
>  	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change
> notification */
>  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>  		rte_intr_fd_get(internal->pdev->intr_handle);
> 
>  	for (i = 0; i < nr_vring; i++)
>  		internal->intr_fd[i] = -1;
> 
> -	for (i = 0; i < nr_vring; i++) {
> -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -			if (fd < 0) {
> -				DRV_LOG(ERR, "can't setup eventfd: %s",
> -					strerror(errno));
> -				return -1;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if ((i & 1) == 0 && m_rx == true) {
> +				/* For the net we only need to relay rx queue,
> +				 * which will change the mem of VM.
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +			}
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +			if (m_rx == true) {
> +				/* For the blk we need to relay all the read cmd
> +				 * of each queue
> +				 */
> +				fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +				if (fd < 0) {
> +					DRV_LOG(ERR, "can't setup eventfd: %s",
> +						strerror(errno));
> +					return -1;
> +				}
> +				internal->intr_fd[i] = fd;
> +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
Many duplicated code here for blk and net. What if we use this condition to know
creating eventfd or not:
if (m_rx == true && (is_blk_dev || (i & 1) == 0)) {
	/* create eventfd and save now */
}
Thanks,
Chenbo
>  			}
> -			internal->intr_fd[i] = fd;
> -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
>  		}
>  	}
> 
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
  2022-04-25 12:42       ` Xia, Chenbo
@ 2022-04-25 13:04       ` David Marchand
  2022-04-26  8:08         ` Pei, Andy
  1 sibling, 1 reply; 263+ messages in thread
From: David Marchand @ 2022-04-25 13:04 UTC (permalink / raw)
  To: Andy Pei; +Cc: dev, Xia, Chenbo, Maxime Coquelin, gang.cao, Liu, Changpeng
On Thu, Apr 21, 2022 at 11:20 AM Andy Pei <andy.pei@intel.com> wrote:
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..3780804 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>         [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>         [VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>         [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +       [VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +       [VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>         [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>         [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>         [VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
>
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct virtio_net *dev = *pdev;
> +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +       int ret = 0;
You must check if there is any fd attached to this message.
> +
> +       if (vdpa_dev->ops->get_config) {
> +               ret = vdpa_dev->ops->get_config(dev->vid,
> +                                          ctx->msg.payload.cfg.region,
> +                                          ctx->msg.payload.cfg.size);
> +               if (ret != 0) {
> +                       ctx->msg.size = 0;
> +                       VHOST_LOG_CONFIG(ERR,
> +                                        "(%s) get_config() return error!\n",
> +                                        dev->ifname);
> +               }
> +       } else {
> +               VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",
> +                                dev->ifname);
> +       }
> +
> +       return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +                       struct vhu_msg_context *ctx,
> +                       int main_fd __rte_unused)
> +{
> +       struct virtio_net *dev = *pdev;
> +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +       int ret = 0;
Idem.
> +
> +       if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> +               VHOST_LOG_CONFIG(ERR,
> +                       "(%s) invalid set config msg size: %"PRId32" != %d\n",
> +                       dev->ifname, ctx->msg.size,
> +                       (int)sizeof(struct vhost_user_config));
> +               goto OUT;
> +       }
For info, I posted a series to make this kind of check more systematic.
See: https://patchwork.dpdk.org/project/dpdk/patch/20220425125431.26464-2-david.marchand@redhat.com/
--
David Marchand
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
  2022-04-21  8:33     ` [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-04-25 13:10       ` Xia, Chenbo
  2022-04-26 10:07         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-04-25 13:10 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> 
> Add SW live-migration support to block device.
> Add dirty page logging to block device.
Add SW live-migration support including dirty page logging for block device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>  drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++--
> -----
>  3 files changed, 115 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index d10c1fd..e417c50 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -191,7 +191,7 @@
>  	IFCVF_WRITE_REG32(val >> 32, hi);
>  }
> 
> -STATIC int
> +int
>  ifcvf_hw_enable(struct ifcvf_hw *hw)
>  {
>  	struct ifcvf_pci_common_cfg *cfg;
> @@ -240,7 +240,7 @@
>  	return 0;
>  }
> 
> -STATIC void
> +void
>  ifcvf_hw_disable(struct ifcvf_hw *hw)
>  {
>  	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>  u64
>  ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
> 
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>  #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8d104b7..a23dc2d 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
>  	}
>  }
> 
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
> +{
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		ifcvf_disable_logging(hw);
> +		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> +		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> +				log_base, IFCVF_LOG_BASE, log_size);
> +		/*
> +		 * IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}
Can we consider combining vdpa_ifcvf_blk_pause and vdpa_ifcvf_stop to one function and
check device type internally to do different things? Because as I see, most logic is 
the same.
> +
>  #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
>  		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
>  static int
> @@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
>  		}
>  		hw->vring[i].avail = gpa;
> 
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> -			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> -			if (gpa == 0) {
> -				DRV_LOG(ERR, "Fail to get GPA for used ring.");
> -				return -1;
> +		if (internal->device_type == IFCVF_NET) {
> +			/* Direct I/O for Tx queue, relay for Rx queue */
> +			if (i & 1) {
> +				gpa = hva_to_gpa(vid,
> (uint64_t)(uintptr_t)vq.used);
> +				if (gpa == 0) {
> +					DRV_LOG(ERR, "Fail to get GPA for used
> ring.");
> +					return -1;
> +				}
> +				hw->vring[i].used = gpa;
> +			} else {
> +				hw->vring[i].used = m_vring_iova +
> +					(char *)internal->m_vring[i].used -
> +					(char *)internal->m_vring[i].desc;
>  			}
> -			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
> +			/* BLK: relay every queue */
>  			hw->vring[i].used = m_vring_iova +
>  				(char *)internal->m_vring[i].used -
>  				(char *)internal->m_vring[i].desc;
> @@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
>  	}
>  	hw->nr_vring = nr_vring;
> 
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
> 
>  error:
>  	for (i = 0; i < nr_vring; i++)
> @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
> 
>  	for (i = 0; i < hw->nr_vring; i++) {
>  		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (internal->device_type == IFCVF_NET) {
> +			if ((i & 1) == 0)
> +				update_used_ring(internal, i);
> +		} else if (internal->device_type == IFCVF_BLK) {
>  			update_used_ring(internal, i);
> +		}
> 
>  		rte_vhost_get_vhost_vring(vid, i, &vq);
>  		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
>  		}
>  	}
> 
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
It seems we can also reduce duplicate code for above case. And for other checks,
if we can use only one combined condition to check, I prefer to just use one.
Thanks,
Chenbo
>  		}
> -		update_used_ring(internal, qid);
>  	}
> 
>  	/* start relay with a first kick */
> @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
> 
>  	/* stop the direct IO data path */
>  	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>  	vdpa_disable_vfio_intr(internal);
> 
>  	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
  2022-04-21  8:33     ` [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-04-25 13:38       ` Xia, Chenbo
  2022-04-27  4:11         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-04-25 13:38 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Andy,
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
> 
> Add virtio blk device support to vDPA example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  examples/vdpa/main.c             |  61 +++++++++++++-
>  examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
>  examples/vdpa/vhost_user.h       | 169
> +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 301 insertions(+), 1 deletion(-)
>  create mode 100644 examples/vdpa/vdpa_blk_compact.h
>  create mode 100644 examples/vdpa/vhost_user.h
> 
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
> index 5ab0765..1c809ab 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>  #include <cmdline_parse_string.h>
>  #include <cmdline_parse_num.h>
>  #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
> 
>  #define MAX_PATH_LEN 128
>  #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -41,6 +42,7 @@ struct vdpa_port {
>  static int devcnt;
>  static int interactive;
>  static int client_mode;
> +static int isblk;
> 
>  /* display usage */
>  static void
> @@ -49,7 +51,8 @@ struct vdpa_port {
>  	printf("Usage: %s [EAL options] -- "
>  				 "	--interactive|-i: run in interactive
> mode.\n"
>  				 "	--iface <path>: specify the path prefix of
> the socket files, e.g. /tmp/vhost-user-.\n"
> -				 "	--client: register a vhost-user socket as
> client mode.\n",
> +				 "	--client: register a vhost-user socket as
> client mode.\n"
> +				 "	--isblk: device is a block device, e.g.
> virtio_blk device.\n",
>  				 prgname);
>  }
> 
> @@ -61,6 +64,7 @@ struct vdpa_port {
>  		{"iface", required_argument, NULL, 0},
>  		{"interactive", no_argument, &interactive, 1},
>  		{"client", no_argument, &client_mode, 1},
> +		{"isblk", no_argument, &isblk, 1},
I think a new API for get_device_type will be better than asking user to specify the
device type.
>  		{NULL, 0, 0, 0},
>  	};
>  	int opt, idx;
> @@ -159,6 +163,52 @@ struct vdpa_port {
>  };
> 
>  static int
> +vdpa_blk_device_set_features_and_protocol(const char *path)
> +{
> +	uint64_t protocol_features = 0;
> +	int ret;
> +
> +	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES_BASE);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_disable_features(path,
> +		VHOST_VDPA_BLK_DISABLED_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_disable_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_get_protocol_features(path,
> &protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_get_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> +
> +	ret = rte_vhost_driver_set_protocol_features(path,
> protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +out:
> +	return ret;
> +}
> +
> +static int
>  start_vdpa(struct vdpa_port *vport)
>  {
>  	int ret;
> @@ -192,6 +242,15 @@ struct vdpa_port {
>  			"attach vdpa device failed: %s\n",
>  			socket_path);
> 
> +	if (isblk) {
> +		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
> +		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +				"set vhost blk driver features and protocol
> features failed: %s\n",
> +				socket_path);
> +	}
> +
>  	if (rte_vhost_driver_start(socket_path) < 0)
>  		rte_exit(EXIT_FAILURE,
>  			"start vhost driver failed: %s\n",
> diff --git a/examples/vdpa/vdpa_blk_compact.h
> b/examples/vdpa/vdpa_blk_compact.h
Please remove unused definition in this header. I didn't check all, but it
seems at least VHOST_BLK_PROTOCOL_FEATURES is not used.
> new file mode 100644
> index 0000000..e7c0f22
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,72 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size
> */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments
> */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> */
> +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> config */
> +
> +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */
> +#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES
> +#define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#endif
> +
> +#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> +
> +#define VHOST_BLK_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) |
> \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> +
> +#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
> \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  |
> \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE)
> | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define VHOST_VDPA_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE)
> | \
> +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> +
> +/* Vhost-blk support protocol features */
> +#define VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
> new file mode 100644
> index 0000000..137bbc2
> --- /dev/null
> +++ b/examples/vdpa/vhost_user.h
I don't understand, why introduce this header? It seems never used.
Thanks,
Chenbo
> @@ -0,0 +1,169 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "rte_vhost.h"
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +#endif
> +
> +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL <<
> VHOST_USER_PROTOCOL_F_MQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
> +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SET_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> +	VHOST_USER_POSTCOPY_ADVISE = 28,
> +	VHOST_USER_POSTCOPY_LISTEN = 29,
> +	VHOST_USER_POSTCOPY_END = 30,
> +	VHOST_USER_GET_INFLIGHT_FD = 31,
> +	VHOST_USER_SET_INFLIGHT_FD = 32,
> +	VHOST_USER_MAX = 33
> +} VhostUserRequest;
> +
> +typedef enum VhostUserSlaveRequest {
> +	VHOST_USER_SLAVE_NONE = 0,
> +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> +	VHOST_USER_SLAVE_MAX
> +} VhostUserSlaveRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserLog {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +} VhostUserLog;
> +
> +/* Comply with Cryptodev-Linux */
> +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> +
> +/* Same structure as vhost-user backend session info */
> +typedef struct VhostUserCryptoSessionParam {
> +	int64_t session_id;
> +	uint32_t op_code;
> +	uint32_t cipher_algo;
> +	uint32_t cipher_key_len;
> +	uint32_t hash_algo;
> +	uint32_t digest_len;
> +	uint32_t auth_key_len;
> +	uint32_t aad_len;
> +	uint8_t op_type;
> +	uint8_t dir;
> +	uint8_t hash_mode;
> +	uint8_t chaining_dir;
> +	uint8_t *ciphe_key;
> +	uint8_t *auth_key;
> +	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> +	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> +} VhostUserCryptoSessionParam;
> +
> +typedef struct VhostUserVringArea {
> +	uint64_t u64;
> +	uint64_t size;
> +	uint64_t offset;
> +} VhostUserVringArea;
> +
> +typedef struct VhostUserInflight {
> +	uint64_t mmap_size;
> +	uint64_t mmap_offset;
> +	uint16_t num_queues;
> +	uint16_t queue_size;
> +} VhostUserInflight;
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
> +typedef struct VhostUserMsg {
> +	union {
> +		uint32_t master; /* a VhostUserRequest value */
> +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> +	} request;
> +
> +#define VHOST_USER_VERSION_MASK     0x3
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   0xff
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +		VhostUserMemory memory;
> +		VhostUserLog    log;
> +		struct vhost_iotlb_msg iotlb;
> +		VhostUserCryptoSessionParam crypto_session;
> +		VhostUserVringArea area;
> +		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +	int fd_num;
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    0x1
> +#endif
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 08/16] usertools: add support for virtio blk device
  2022-04-21  8:33     ` [PATCH v6 08/16] usertools: add support for virtio blk device Andy Pei
@ 2022-04-25 13:53       ` Xia, Chenbo
  2022-04-26  4:13         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-04-25 13:53 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, david.marchand
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 08/16] usertools: add support for virtio blk device
> 
> Add virtio blk device support to devbind.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  usertools/dpdk-devbind.py | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
> index ace4627..cbe336f 100755
> --- a/usertools/dpdk-devbind.py
> +++ b/usertools/dpdk-devbind.py
> @@ -14,6 +14,8 @@
>  from os.path import join as path_join
Supporting it in this script or not, it should not be named as 'XXX_class'.
And even may not be defined as new class. Maybe go to misc devices for now?
I'd like to hear others' opinion.
And Stephen commented about using driverctl, please reply to that.
Thanks,
Chenbo
> 
>  # The PCI base class for all devices
> +virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
> +                    'SVendor': '8086', 'SDevice': '0002'}
>  network_class = {'Class': '02', 'Vendor': None, 'Device': None,
>                   'SVendor': None, 'SDevice': None}
>  acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
> @@ -72,6 +74,7 @@
>  cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
>                   'SVendor': None, 'SDevice': None}
> 
> +virtio_blk_devices = [virtio_blk_class]
>  network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
>  baseband_devices = [acceleration_class]
>  crypto_devices = [encryption_class, intel_processor_class]
> @@ -587,6 +590,9 @@ def show_status():
>      Displays to the user what devices are bound to the igb_uio driver,
> the
>      kernel driver or to no driver'''
> 
> +    if status_dev in ["virtio_blk", "all"]:
> +        show_device_status(virtio_blk_devices, "virtio_blk")
> +
>      if status_dev in ["net", "all"]:
>          show_device_status(network_devices, "Network", if_field=True)
> 
> @@ -746,6 +752,7 @@ def do_arg_actions():
>          if b_flag is not None:
>              clear_data()
>              # refresh if we have changed anything
> +            get_device_details(virtio_blk_devices)
>              get_device_details(network_devices)
>              get_device_details(baseband_devices)
>              get_device_details(crypto_devices)
> @@ -769,6 +776,7 @@ def main():
>      parse_args()
>      check_modules()
>      clear_data()
> +    get_device_details(virtio_blk_devices)
>      get_device_details(network_devices)
>      get_device_details(baseband_devices)
>      get_device_details(crypto_devices)
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 08/16] usertools: add support for virtio blk device
  2022-04-25 13:53       ` Xia, Chenbo
@ 2022-04-26  4:13         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-04-26  4:13 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, david.marchand
Hi Chenbo,
Thanks for your reply.
I will send out a new version, and put blk device into misc devices.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 9:53 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; david.marchand@redhat.com
> Subject: RE: [PATCH v6 08/16] usertools: add support for virtio blk device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 08/16] usertools: add support for virtio blk device
> >
> > Add virtio blk device support to devbind.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  usertools/dpdk-devbind.py | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
> > index ace4627..cbe336f 100755
> > --- a/usertools/dpdk-devbind.py
> > +++ b/usertools/dpdk-devbind.py
> > @@ -14,6 +14,8 @@
> >  from os.path import join as path_join
> 
> Supporting it in this script or not, it should not be named as 'XXX_class'.
> And even may not be defined as new class. Maybe go to misc devices for
> now?
> I'd like to hear others' opinion.
> 
> And Stephen commented about using driverctl, please reply to that.
> 
> Thanks,
> Chenbo
> 
> >
> >  # The PCI base class for all devices
> > +virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
> > +                    'SVendor': '8086', 'SDevice': '0002'}
> >  network_class = {'Class': '02', 'Vendor': None, 'Device': None,
> >                   'SVendor': None, 'SDevice': None}
> > acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
> > @@ -72,6 +74,7 @@  cn9k_ree = {'Class': '08', 'Vendor': '177d',
> > 'Device': 'a0f4',
> >                   'SVendor': None, 'SDevice': None}
> >
> > +virtio_blk_devices = [virtio_blk_class]
> >  network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
> > baseband_devices = [acceleration_class]  crypto_devices =
> > [encryption_class, intel_processor_class] @@ -587,6 +590,9 @@ def
> > show_status():
> >      Displays to the user what devices are bound to the igb_uio
> > driver, the
> >      kernel driver or to no driver'''
> >
> > +    if status_dev in ["virtio_blk", "all"]:
> > +        show_device_status(virtio_blk_devices, "virtio_blk")
> > +
> >      if status_dev in ["net", "all"]:
> >          show_device_status(network_devices, "Network", if_field=True)
> >
> > @@ -746,6 +752,7 @@ def do_arg_actions():
> >          if b_flag is not None:
> >              clear_data()
> >              # refresh if we have changed anything
> > +            get_device_details(virtio_blk_devices)
> >              get_device_details(network_devices)
> >              get_device_details(baseband_devices)
> >              get_device_details(crypto_devices)
> > @@ -769,6 +776,7 @@ def main():
> >      parse_args()
> >      check_modules()
> >      clear_data()
> > +    get_device_details(virtio_blk_devices)
> >      get_device_details(network_devices)
> >      get_device_details(baseband_devices)
> >      get_device_details(crypto_devices)
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-25 13:04       ` David Marchand
@ 2022-04-26  8:08         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-04-26  8:08 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Xia, Chenbo, Maxime Coquelin, Cao, Gang, Liu, Changpeng
HI David,
Thanks for your reply.
I will send out a version to address that.
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Monday, April 25, 2022 9:05 PM
> To: Pei, Andy <andy.pei@intel.com>
> Cc: dev <dev@dpdk.org>; Xia, Chenbo <chenbo.xia@intel.com>; Maxime
> Coquelin <maxime.coquelin@redhat.com>; Cao, Gang
> <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: Re: [PATCH v6 03/16] vhost: add vhost msg support
> 
> On Thu, Apr 21, 2022 at 11:20 AM Andy Pei <andy.pei@intel.com> wrote:
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..3780804 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >         [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >         [VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >         [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +       [VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +       [VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >         [VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >         [VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >         [VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE",
> > @@ -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net
> > *dev,  }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +                       struct vhu_msg_context *ctx,
> > +                       int main_fd __rte_unused) {
> > +       struct virtio_net *dev = *pdev;
> > +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +       int ret = 0;
> 
> You must check if there is any fd attached to this message.
> 
> 
> > +
> > +       if (vdpa_dev->ops->get_config) {
> > +               ret = vdpa_dev->ops->get_config(dev->vid,
> > +                                          ctx->msg.payload.cfg.region,
> > +                                          ctx->msg.payload.cfg.size);
> > +               if (ret != 0) {
> > +                       ctx->msg.size = 0;
> > +                       VHOST_LOG_CONFIG(ERR,
> > +                                        "(%s) get_config() return error!\n",
> > +                                        dev->ifname);
> > +               }
> > +       } else {
> > +               VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supportted!\n",
> > +                                dev->ifname);
> > +       }
> > +
> > +       return RTE_VHOST_MSG_RESULT_REPLY; }
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +                       struct vhu_msg_context *ctx,
> > +                       int main_fd __rte_unused) {
> > +       struct virtio_net *dev = *pdev;
> > +       struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +       int ret = 0;
> 
> Idem.
> 
> 
> > +
> > +       if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> > +               VHOST_LOG_CONFIG(ERR,
> > +                       "(%s) invalid set config msg size: %"PRId32" != %d\n",
> > +                       dev->ifname, ctx->msg.size,
> > +                       (int)sizeof(struct vhost_user_config));
> > +               goto OUT;
> > +       }
> 
> 
> For info, I posted a series to make this kind of check more systematic.
> See:
> https://patchwork.dpdk.org/project/dpdk/patch/20220425125431.26464-2-
> david.marchand@redhat.com/
> 
> 
> 
> --
> David Marchand
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-25 12:42       ` Xia, Chenbo
@ 2022-04-26  8:55         ` Pei, Andy
  2022-04-26  9:17           ` Xia, Chenbo
  0 siblings, 1 reply; 263+ messages in thread
From: Pei, Andy @ 2022-04-26  8:55 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
HI Chenbo, 
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 8:42 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> 
> Hi Andy,
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 03/16] vhost: add vhost msg support
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 69
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 13 ++++++++++
> >  2 files changed, 82 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..3780804 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >  	[VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >  	[VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >  	[VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >  	[VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE", @@
> > -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (vdpa_dev->ops->get_config) {
> > +		ret = vdpa_dev->ops->get_config(dev->vid,
> > +					   ctx->msg.payload.cfg.region,
> > +					   ctx->msg.payload.cfg.size);
> > +		if (ret != 0) {
> > +			ctx->msg.size = 0;
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) get_config() return error!\n",
> > +					 dev->ifname);
> > +		}
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> supportted!\n",
> 
> Supported
> 
I will send out a new version to fix this.
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> 
> I think you should do sanity check on payload.cfg.size and make sure it's
> smaller than VHOST_USER_MAX_CONFIG_SIZE
> 
> and same check for offset
> 
I think payload.cfg.size can be smaller than or equal to VHOST_USER_MAX_CONFIG_SIZE.
payload.cfg.ofset can be smaller than or equal to VHOST_USER_MAX_CONFIG_SIZE as well
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) invalid set config msg size: %"PRId32" != %d\n",
> > +			dev->ifname, ctx->msg.size,
> 
> Based on you will change the log too, payload.cfg.size is uint32_t, so PRId32 ->
> PRIu32
> 
> > +			(int)sizeof(struct vhost_user_config));
> 
> So this can be %u
> 
Sure.
> > +		goto OUT;
> > +	}
> > +
> > +	if (vdpa_dev->ops->set_config) {
> > +		ret = vdpa_dev->ops->set_config(dev->vid,
> > +			ctx->msg.payload.cfg.region,
> > +			ctx->msg.payload.cfg.offset,
> > +			ctx->msg.payload.cfg.size,
> > +			ctx->msg.payload.cfg.flags);
> > +		if (ret)
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) set_config() return error!\n",
> > +					 dev->ifname);
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> supportted!\n",
> 
> Supported
> 
I will send out a new version to fix this.
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_OK;
> > +
> > +OUT:
> 
> Lower case looks better
> 
OK. I will send out a new version to fix this.
> > +	return RTE_VHOST_MSG_RESULT_ERR;
> > +}
> 
> Almost all handlers need check on expected fd num (this case is 0), so the
> above new handlers should also do that. Please refer to validate_msg_fds in
> other handlers.
> 
> BTW, you can wait for review for other patches and send new versions later.
> 
I will send out new patch after vhost: validate fds attached to messages from David Marchand is merged.
> Thanks,
> Chenbo
> 
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >  			struct vhu_msg_context *ctx,
> >  			int main_fd __rte_unused)
> > @@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> > virtio_net **pdev,
> >  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> >  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> >  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> >  	[VHOST_USER_POSTCOPY_ADVISE] =
> vhost_user_set_postcopy_advise,
> >  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> >  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> git
> > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > c946cc2..97cfb2f 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >  	VHOST_USER_NET_SET_MTU = 20,
> >  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >  	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_GET_CONFIG = 24,
> > +	VHOST_USER_SET_CONFIG = 25,
> >  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >  	VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -125,6 +127,16 @@
> >  	uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> >  typedef struct VhostUserMsg {
> >  	union {
> >  		uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> +160,7 @@
> >  		VhostUserCryptoSessionParam crypto_session;
> >  		VhostUserVringArea area;
> >  		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> >  	} payload;
> >  	/* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-26  8:55         ` Pei, Andy
@ 2022-04-26  9:17           ` Xia, Chenbo
  2022-04-27  4:12             ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-04-26  9:17 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, April 26, 2022 4:56 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> 
> HI Chenbo,
> 
> Thanks for your reply.
> My reply is inline.
> 
> > -----Original Message-----
> > From: Xia, Chenbo <chenbo.xia@intel.com>
> > Sent: Monday, April 25, 2022 8:42 PM
> > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > Changpeng <changpeng.liu@intel.com>
> > Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> >
> > Hi Andy,
> >
> > > -----Original Message-----
> > > From: Pei, Andy <andy.pei@intel.com>
> > > Sent: Thursday, April 21, 2022 4:34 PM
> > > To: dev@dpdk.org
> > > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > > <changpeng.liu@intel.com>
> > > Subject: [PATCH v6 03/16] vhost: add vhost msg support
> > >
> > > Add support for VHOST_USER_GET_CONFIG and
> > VHOST_USER_SET_CONFIG.
> > > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > > supported by virtio blk VDPA device.
> > >
> > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > ---
> > >  lib/vhost/vhost_user.c | 69
> > > ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  lib/vhost/vhost_user.h | 13 ++++++++++
> > >  2 files changed, 82 insertions(+)
> > >
> > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > > 1d39067..3780804 100644
> > > --- a/lib/vhost/vhost_user.c
> > > +++ b/lib/vhost/vhost_user.c
> > > @@ -80,6 +80,8 @@
> > >  [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> > >  [VHOST_USER_SET_SLAVE_REQ_FD]  =
> > "VHOST_USER_SET_SLAVE_REQ_FD",
> > >  [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > > +[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > > +[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> > >  [VHOST_USER_CRYPTO_CREATE_SESS] =
> > "VHOST_USER_CRYPTO_CREATE_SESS",
> > >  [VHOST_USER_CRYPTO_CLOSE_SESS] =
> > "VHOST_USER_CRYPTO_CLOSE_SESS",
> > >  [VHOST_USER_POSTCOPY_ADVISE]  =
> > "VHOST_USER_POSTCOPY_ADVISE", @@
> > > -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > > }
> > >
> > >  static int
> > > +vhost_user_get_config(struct virtio_net **pdev,
> > > +struct vhu_msg_context *ctx,
> > > +int main_fd __rte_unused)
> > > +{
> > > +struct virtio_net *dev = *pdev;
> > > +struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > > +int ret = 0;
> > > +
> > > +if (vdpa_dev->ops->get_config) {
> > > +ret = vdpa_dev->ops->get_config(dev->vid,
> > > +   ctx->msg.payload.cfg.region,
> > > +   ctx->msg.payload.cfg.size);
> > > +if (ret != 0) {
> > > +ctx->msg.size = 0;
> > > +VHOST_LOG_CONFIG(ERR,
> > > + "(%s) get_config() return error!\n",
> > > + dev->ifname);
> > > +}
> > > +} else {
> > > +VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> > supportted!\n",
> >
> > Supported
> >
> I will send out a new version to fix this.
> > > + dev->ifname);
> > > +}
> > > +
> > > +return RTE_VHOST_MSG_RESULT_REPLY;
> > > +}
> > > +
> > > +static int
> > > +vhost_user_set_config(struct virtio_net **pdev,
> > > +struct vhu_msg_context *ctx,
> > > +int main_fd __rte_unused)
> > > +{
> > > +struct virtio_net *dev = *pdev;
> > > +struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > > +int ret = 0;
> > > +
> > > +if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> >
> > I think you should do sanity check on payload.cfg.size and make sure
> it's
> > smaller than VHOST_USER_MAX_CONFIG_SIZE
> >
> > and same check for offset
> >
> I think payload.cfg.size can be smaller than or equal to
> VHOST_USER_MAX_CONFIG_SIZE.
> payload.cfg.ofset can be smaller than or equal to
> VHOST_USER_MAX_CONFIG_SIZE as well
After double check: offset is the config space offset, so this should be checked
in vdpa driver. Size check on vhost lib layer should be just <= MAX_you_defined
Thanks,
Chenbo
> 
> > > +VHOST_LOG_CONFIG(ERR,
> > > +"(%s) invalid set config msg size: %"PRId32" != %d\n",
> > > +dev->ifname, ctx->msg.size,
> >
> > Based on you will change the log too, payload.cfg.size is uint32_t, so
> PRId32 ->
> > PRIu32
> >
> > > +(int)sizeof(struct vhost_user_config));
> >
> > So this can be %u
> >
> Sure.
> > > +goto OUT;
> > > +}
> > > +
> > > +if (vdpa_dev->ops->set_config) {
> > > +ret = vdpa_dev->ops->set_config(dev->vid,
> > > +ctx->msg.payload.cfg.region,
> > > +ctx->msg.payload.cfg.offset,
> > > +ctx->msg.payload.cfg.size,
> > > +ctx->msg.payload.cfg.flags);
> > > +if (ret)
> > > +VHOST_LOG_CONFIG(ERR,
> > > + "(%s) set_config() return error!\n",
> > > + dev->ifname);
> > > +} else {
> > > +VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> > supportted!\n",
> >
> > Supported
> >
> I will send out a new version to fix this.
> > > + dev->ifname);
> > > +}
> > > +
> > > +return RTE_VHOST_MSG_RESULT_OK;
> > > +
> > > +OUT:
> >
> > Lower case looks better
> >
> OK. I will send out a new version to fix this.
> > > +return RTE_VHOST_MSG_RESULT_ERR;
> > > +}
> >
> > Almost all handlers need check on expected fd num (this case is 0), so
> the
> > above new handlers should also do that. Please refer to validate_msg_fds
> in
> > other handlers.
> >
> > BTW, you can wait for review for other patches and send new versions
> later.
> >
> I will send out new patch after vhost: validate fds attached to messages
> from David Marchand is merged.
> > Thanks,
> > Chenbo
> >
> > > +
> > > +static int
> > >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> > >  struct vhu_msg_context *ctx,
> > >  int main_fd __rte_unused)
> > > @@ -2782,6 +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> > > virtio_net **pdev,
> > >  [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> > >  [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> > >  [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > > +[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > > +[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> > >  [VHOST_USER_POSTCOPY_ADVISE] =
> > vhost_user_set_postcopy_advise,
> > >  [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> > >  [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> > git
> > > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > > c946cc2..97cfb2f 100644
> > > --- a/lib/vhost/vhost_user.h
> > > +++ b/lib/vhost/vhost_user.h
> > > @@ -50,6 +50,8 @@
> > >  VHOST_USER_NET_SET_MTU = 20,
> > >  VHOST_USER_SET_SLAVE_REQ_FD = 21,
> > >  VHOST_USER_IOTLB_MSG = 22,
> > > +VHOST_USER_GET_CONFIG = 24,
> > > +VHOST_USER_SET_CONFIG = 25,
> > >  VHOST_USER_CRYPTO_CREATE_SESS = 26,
> > >  VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> > >  VHOST_USER_POSTCOPY_ADVISE = 28,
> > > @@ -125,6 +127,16 @@
> > >  uint16_t queue_size;
> > >  } VhostUserInflight;
> > >
> > > +#define VHOST_USER_MAX_CONFIG_SIZE256
> > > +
> > > +/** Get/set config msg payload */
> > > +struct vhost_user_config {
> > > +uint32_t offset;
> > > +uint32_t size;
> > > +uint32_t flags;
> > > +uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > > +};
> > > +
> > >  typedef struct VhostUserMsg {
> > >  union {
> > >  uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> > +160,7 @@
> > >  VhostUserCryptoSessionParam crypto_session;
> > >  VhostUserVringArea area;
> > >  VhostUserInflight inflight;
> > > +struct vhost_user_config cfg;
> > >  } payload;
> > >  /* Nothing should be added after the payload */  } __rte_packed
> > > VhostUserMsg;
> > > --
> > > 1.8.3.1
> >
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-25 12:58       ` Xia, Chenbo
@ 2022-04-26  9:56         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-04-26  9:56 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 8:58 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
> 
> Hi Andy,
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device
> >
> > For the block device type, we have to relay the commands on all
> > queues.
> 
> It's a bit short... although I can understand, please add some background on
> current implementation for others to easily understand.
> 
Sure, I will send a new patch set to address this.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 46
> > ++++++++++++++++++++++++++++++++------
> > -----
> >  1 file changed, 35 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8ee041f..8d104b7 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -370,24 +370,48 @@ struct rte_vdpa_dev_info {
> >  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> >  	irq_set->start = 0;
> >  	fd_ptr = (int *)&irq_set->data;
> > +	/* The first interrupt is for the configure space change
> > notification */
> >  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
> >  		rte_intr_fd_get(internal->pdev->intr_handle);
> >
> >  	for (i = 0; i < nr_vring; i++)
> >  		internal->intr_fd[i] = -1;
> >
> > -	for (i = 0; i < nr_vring; i++) {
> > -		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> > -		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > -		if ((i & 1) == 0 && m_rx == true) {
> > -			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> > -			if (fd < 0) {
> > -				DRV_LOG(ERR, "can't setup eventfd: %s",
> > -					strerror(errno));
> > -				return -1;
> > +	if (internal->device_type == IFCVF_NET) {
> > +		for (i = 0; i < nr_vring; i++) {
> > +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> > +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > +			if ((i & 1) == 0 && m_rx == true) {
> > +				/* For the net we only need to relay rx queue,
> > +				 * which will change the mem of VM.
> > +				 */
> > +				fd = eventfd(0, EFD_NONBLOCK |
> EFD_CLOEXEC);
> > +				if (fd < 0) {
> > +					DRV_LOG(ERR, "can't setup
> eventfd: %s",
> > +						strerror(errno));
> > +					return -1;
> > +				}
> > +				internal->intr_fd[i] = fd;
> > +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> > +			}
> > +		}
> > +	} else if (internal->device_type == IFCVF_BLK) {
> > +		for (i = 0; i < nr_vring; i++) {
> > +			rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> > +			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > +			if (m_rx == true) {
> > +				/* For the blk we need to relay all the read
> cmd
> > +				 * of each queue
> > +				 */
> > +				fd = eventfd(0, EFD_NONBLOCK |
> EFD_CLOEXEC);
> > +				if (fd < 0) {
> > +					DRV_LOG(ERR, "can't setup
> eventfd: %s",
> > +						strerror(errno));
> > +					return -1;
> > +				}
> > +				internal->intr_fd[i] = fd;
> > +				fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> 
> Many duplicated code here for blk and net. What if we use this condition to
> know creating eventfd or not:
> 
> if (m_rx == true && (is_blk_dev || (i & 1) == 0)) {
> 	/* create eventfd and save now */
> }
> 
Sure, I will send a new patch set to address this.
> Thanks,
> Chenbo
> 
> >  			}
> > -			internal->intr_fd[i] = fd;
> > -			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> >  		}
> >  	}
> >
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
  2022-04-25 13:10       ` Xia, Chenbo
@ 2022-04-26 10:07         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-04-26 10:07 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 9:10 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> >
> > Add SW live-migration support to block device.
> > Add dirty page logging to block device.
> 
> Add SW live-migration support including dirty page logging for block device.
> 
Sure, I will remove " Add dirty page logging to block device." In next version.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.c |   4 +-
> >  drivers/vdpa/ifc/base/ifcvf.h |   6 ++
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 128
> > +++++++++++++++++++++++++++++++++++--
> > -----
> >  3 files changed, 115 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.c
> > b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..e417c50 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.c
> > +++ b/drivers/vdpa/ifc/base/ifcvf.c
> > @@ -191,7 +191,7 @@
> >  	IFCVF_WRITE_REG32(val >> 32, hi);
> >  }
> >
> > -STATIC int
> > +int
> >  ifcvf_hw_enable(struct ifcvf_hw *hw)
> >  {
> >  	struct ifcvf_pci_common_cfg *cfg;
> > @@ -240,7 +240,7 @@
> >  	return 0;
> >  }
> >
> > -STATIC void
> > +void
> >  ifcvf_hw_disable(struct ifcvf_hw *hw)  {
> >  	u32 i;
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -179,4 +179,10 @@ struct ifcvf_hw {
> >  u64
> >  ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
> >
> > +int
> > +ifcvf_hw_enable(struct ifcvf_hw *hw);
> > +
> > +void
> > +ifcvf_hw_disable(struct ifcvf_hw *hw);
> > +
> >  #endif /* _IFCVF_H_ */
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8d104b7..a23dc2d 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
> >  	}
> >  }
> >
> > +static void
> > +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) {
> > +	struct ifcvf_hw *hw = &internal->hw;
> > +	struct rte_vhost_vring vq;
> > +	int i, vid;
> > +	uint64_t features = 0;
> > +	uint64_t log_base = 0, log_size = 0;
> > +	uint64_t len;
> > +
> > +	vid = internal->vid;
> > +
> > +	if (internal->device_type == IFCVF_BLK) {
> > +		for (i = 0; i < hw->nr_vring; i++) {
> > +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> > +			while (vq.avail->idx != vq.used->idx) {
> > +				ifcvf_notify_queue(hw, i);
> > +				usleep(10);
> > +			}
> > +			hw->vring[i].last_avail_idx = vq.avail->idx;
> > +			hw->vring[i].last_used_idx = vq.used->idx;
> > +		}
> > +	}
> > +
> > +	ifcvf_hw_disable(hw);
> > +
> > +	for (i = 0; i < hw->nr_vring; i++)
> > +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> > +				hw->vring[i].last_used_idx);
> > +
> > +	if (internal->sw_lm)
> > +		return;
> > +
> > +	rte_vhost_get_negotiated_features(vid, &features);
> > +	if (RTE_VHOST_NEED_LOG(features)) {
> > +		ifcvf_disable_logging(hw);
> > +		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> > +		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> > +				log_base, IFCVF_LOG_BASE, log_size);
> > +		/*
> > +		 * IFCVF marks dirty memory pages for only packet buffer,
> > +		 * SW helps to mark the used ring as dirty after device stops.
> > +		 */
> > +		for (i = 0; i < hw->nr_vring; i++) {
> > +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> > +			rte_vhost_log_used_vring(vid, i, 0, len);
> > +		}
> > +	}
> > +}
> 
> Can we consider combining vdpa_ifcvf_blk_pause and vdpa_ifcvf_stop to
> one function and check device type internally to do different things? Because
> as I see, most logic is the same.
> 
OK, I will address it in next version.
> > +
> >  #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
> >  		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))  static int @@ -
> 659,15
> > +709,22 @@ struct rte_vdpa_dev_info {
> >  		}
> >  		hw->vring[i].avail = gpa;
> >
> > -		/* Direct I/O for Tx queue, relay for Rx queue */
> > -		if (i & 1) {
> > -			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> > -			if (gpa == 0) {
> > -				DRV_LOG(ERR, "Fail to get GPA for used
> ring.");
> > -				return -1;
> > +		if (internal->device_type == IFCVF_NET) {
> > +			/* Direct I/O for Tx queue, relay for Rx queue */
> > +			if (i & 1) {
> > +				gpa = hva_to_gpa(vid,
> > (uint64_t)(uintptr_t)vq.used);
> > +				if (gpa == 0) {
> > +					DRV_LOG(ERR, "Fail to get GPA for
> used
> > ring.");
> > +					return -1;
> > +				}
> > +				hw->vring[i].used = gpa;
> > +			} else {
> > +				hw->vring[i].used = m_vring_iova +
> > +					(char *)internal->m_vring[i].used -
> > +					(char *)internal->m_vring[i].desc;
> >  			}
> > -			hw->vring[i].used = gpa;
> > -		} else {
> > +		} else if (internal->device_type == IFCVF_BLK) {
> > +			/* BLK: relay every queue */
> >  			hw->vring[i].used = m_vring_iova +
> >  				(char *)internal->m_vring[i].used -
> >  				(char *)internal->m_vring[i].desc; @@ -686,7
> +743,10 @@ struct
> > rte_vdpa_dev_info {
> >  	}
> >  	hw->nr_vring = nr_vring;
> >
> > -	return ifcvf_start_hw(&internal->hw);
> > +	if (internal->device_type == IFCVF_NET)
> > +		return ifcvf_start_hw(&internal->hw);
> > +	else if (internal->device_type == IFCVF_BLK)
> > +		return ifcvf_hw_enable(&internal->hw);
> >
> >  error:
> >  	for (i = 0; i < nr_vring; i++)
> > @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
> >
> >  	for (i = 0; i < hw->nr_vring; i++) {
> >  		/* synchronize remaining new used entries if any */
> > -		if ((i & 1) == 0)
> > +		if (internal->device_type == IFCVF_NET) {
> > +			if ((i & 1) == 0)
> > +				update_used_ring(internal, i);
> > +		} else if (internal->device_type == IFCVF_BLK) {
> >  			update_used_ring(internal, i);
> > +		}
> >
> >  		rte_vhost_get_vhost_vring(vid, i, &vq);
> >  		len = IFCVF_USED_RING_LEN(vq.size); @@ -773,17 +837,36
> @@ struct
> > rte_vdpa_dev_info {
> >  		}
> >  	}
> >
> > -	for (qid = 0; qid < q_num; qid += 2) {
> > -		ev.events = EPOLLIN | EPOLLPRI;
> > -		/* leave a flag to mark it's for interrupt */
> > -		ev.data.u64 = 1 | qid << 1 |
> > -			(uint64_t)internal->intr_fd[qid] << 32;
> > -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid],
> &ev)
> > -				< 0) {
> > -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> > -			return NULL;
> > +	if (internal->device_type == IFCVF_NET) {
> > +		for (qid = 0; qid < q_num; qid += 2) {
> > +			ev.events = EPOLLIN | EPOLLPRI;
> > +			/* leave a flag to mark it's for interrupt */
> > +			ev.data.u64 = 1 | qid << 1 |
> > +				(uint64_t)internal->intr_fd[qid] << 32;
> > +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> > +				      internal->intr_fd[qid], &ev)
> > +					< 0) {
> > +				DRV_LOG(ERR, "epoll add error: %s",
> > +					strerror(errno));
> > +				return NULL;
> > +			}
> > +			update_used_ring(internal, qid);
> > +		}
> > +	} else if (internal->device_type == IFCVF_BLK) {
> > +		for (qid = 0; qid < q_num; qid += 1) {
> > +			ev.events = EPOLLIN | EPOLLPRI;
> > +			/* leave a flag to mark it's for interrupt */
> > +			ev.data.u64 = 1 | qid << 1 |
> > +				(uint64_t)internal->intr_fd[qid] << 32;
> > +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> > +				      internal->intr_fd[qid], &ev)
> > +					< 0) {
> > +				DRV_LOG(ERR, "epoll add error: %s",
> > +					strerror(errno));
> > +				return NULL;
> > +			}
> > +			update_used_ring(internal, qid);
> 
> It seems we can also reduce duplicate code for above case. And for other
> checks, if we can use only one combined condition to check, I prefer to just
> use one.
> 
OK, I will address it in next version.
> Thanks,
> Chenbo
> 
> >  		}
> > -		update_used_ring(internal, qid);
> >  	}
> >
> >  	/* start relay with a first kick */
> > @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
> >
> >  	/* stop the direct IO data path */
> >  	unset_notify_relay(internal);
> > -	vdpa_ifcvf_stop(internal);
> > +	if (internal->device_type == IFCVF_NET)
> > +		vdpa_ifcvf_stop(internal);
> > +	else if (internal->device_type == IFCVF_BLK)
> > +		vdpa_ifcvf_blk_pause(internal);
> >  	vdpa_disable_vfio_intr(internal);
> >
> >  	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL,
> false);
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example
  2022-04-25 13:38       ` Xia, Chenbo
@ 2022-04-27  4:11         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-04-27  4:11 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 9:39 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in
> example
> 
> Hi Andy,
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 07/16] examples/vdpa: add vDPA blk support in
> > example
> >
> > Add virtio blk device support to vDPA example.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  examples/vdpa/main.c             |  61 +++++++++++++-
> >  examples/vdpa/vdpa_blk_compact.h |  72 +++++++++++++++++
> >  examples/vdpa/vhost_user.h       | 169
> > +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 301 insertions(+), 1 deletion(-)  create mode 100644
> > examples/vdpa/vdpa_blk_compact.h  create mode 100644
> > examples/vdpa/vhost_user.h
> >
> > diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index
> > 5ab0765..1c809ab 100644
> > --- a/examples/vdpa/main.c
> > +++ b/examples/vdpa/main.c
> > @@ -20,6 +20,7 @@
> >  #include <cmdline_parse_string.h>
> >  #include <cmdline_parse_num.h>
> >  #include <cmdline.h>
> > +#include "vdpa_blk_compact.h"
> >
> >  #define MAX_PATH_LEN 128
> >  #define MAX_VDPA_SAMPLE_PORTS 1024
> > @@ -41,6 +42,7 @@ struct vdpa_port {
> >  static int devcnt;
> >  static int interactive;
> >  static int client_mode;
> > +static int isblk;
> >
> >  /* display usage */
> >  static void
> > @@ -49,7 +51,8 @@ struct vdpa_port {
> >  	printf("Usage: %s [EAL options] -- "
> >  				 "	--interactive|-i: run in interactive
> > mode.\n"
> >  				 "	--iface <path>: specify the path prefix
> of
> > the socket files, e.g. /tmp/vhost-user-.\n"
> > -				 "	--client: register a vhost-user socket
> as
> > client mode.\n",
> > +				 "	--client: register a vhost-user socket
> as
> > client mode.\n"
> > +				 "	--isblk: device is a block device, e.g.
> > virtio_blk device.\n",
> >  				 prgname);
> >  }
> >
> > @@ -61,6 +64,7 @@ struct vdpa_port {
> >  		{"iface", required_argument, NULL, 0},
> >  		{"interactive", no_argument, &interactive, 1},
> >  		{"client", no_argument, &client_mode, 1},
> > +		{"isblk", no_argument, &isblk, 1},
> 
> I think a new API for get_device_type will be better than asking user to
> specify the device type.
> 
Good suggestion. I will send out a version of patch set and try to do this.
> >  		{NULL, 0, 0, 0},
> >  	};
> >  	int opt, idx;
> > @@ -159,6 +163,52 @@ struct vdpa_port {  };
> >
> >  static int
> > +vdpa_blk_device_set_features_and_protocol(const char *path) {
> > +	uint64_t protocol_features = 0;
> > +	int ret;
> > +
> > +	ret = rte_vhost_driver_set_features(path,
> VHOST_BLK_FEATURES_BASE);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_disable_features(path,
> > +		VHOST_VDPA_BLK_DISABLED_FEATURES);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_disable_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_get_protocol_features(path,
> > &protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_get_protocol_features for %s
> > failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	protocol_features |= (1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
> > +	protocol_features |= (1ULL <<
> VHOST_USER_PROTOCOL_F_LOG_SHMFD);
> > +
> > +	ret = rte_vhost_driver_set_protocol_features(path,
> > protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_protocol_features for %s
> > failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +out:
> > +	return ret;
> > +}
> > +
> > +static int
> >  start_vdpa(struct vdpa_port *vport)
> >  {
> >  	int ret;
> > @@ -192,6 +242,15 @@ struct vdpa_port {
> >  			"attach vdpa device failed: %s\n",
> >  			socket_path);
> >
> > +	if (isblk) {
> > +		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
> > +		ret =
> vdpa_blk_device_set_features_and_protocol(socket_path);
> > +		if (ret != 0)
> > +			rte_exit(EXIT_FAILURE,
> > +				"set vhost blk driver features and protocol
> > features failed: %s\n",
> > +				socket_path);
> > +	}
> > +
> >  	if (rte_vhost_driver_start(socket_path) < 0)
> >  		rte_exit(EXIT_FAILURE,
> >  			"start vhost driver failed: %s\n", diff --git
> > a/examples/vdpa/vdpa_blk_compact.h
> > b/examples/vdpa/vdpa_blk_compact.h
> 
> Please remove unused definition in this header. I didn't check all, but it
> seems at least VHOST_BLK_PROTOCOL_FEATURES is not used.
> 
Sure. I will remove all unused definition.
> > new file mode 100644
> > index 0000000..e7c0f22
> > --- /dev/null
> > +++ b/examples/vdpa/vdpa_blk_compact.h
> > @@ -0,0 +1,72 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation  */
> > +
> > +#ifndef _VDPA_BLK_COMPACT_H_
> > +#define _VDPA_BLK_COMPACT_H_
> > +
> > +/**
> > + * @file
> > + *
> > + * Device specific vhost lib
> > + */
> > +
> > +#include <stdbool.h>
> > +
> > +#include <rte_pci.h>
> > +#include <rte_vhost.h>
> > +
> > +/* Feature bits */
> > +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment
> size
> > */
> > +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of
> segments
> > */
> > +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> > +#define VIRTIO_BLK_F_RO           5    /* Disk is read-only */
> > +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> > */
> > +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> > available */
> > +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> > +#define VIRTIO_BLK_F_DISCARD      13   /* DISCARD is supported */
> > +#define VIRTIO_BLK_F_WRITE_ZEROES 14   /* WRITE ZEROES is supported
> */
> > +
> > +/* Legacy feature bits */
> > +#ifndef VIRTIO_BLK_NO_LEGACY
> > +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> > +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> > */
> > +#define VIRTIO_BLK_F_FLUSH        9    /* Flush command supported */
> > +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> > config */
> > +
> > +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ #define
> > +VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH #endif
> /* !VIRTIO_BLK_NO_LEGACY
> > +*/
> > +
> > +#ifndef VHOST_USER_F_PROTOCOL_FEATURES #define
> > +VHOST_USER_F_PROTOCOL_FEATURES 30 #endif
> > +
> > +#define VHOST_BLK_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
> > +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> > +	(1ULL << VIRTIO_F_VERSION_1) | \
> > +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> > +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> > +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
> > +
> > +#define VHOST_BLK_DISABLED_FEATURES ((1ULL <<
> > +VIRTIO_RING_F_EVENT_IDX) |
> > \
> > +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
> > +
> > +#define VHOST_BLK_FEATURES_BASE (VHOST_BLK_FEATURES | \
> > +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL <<
> VIRTIO_BLK_F_SEG_MAX) | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_BLK_SIZE) |
> > \
> > +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL <<
> VIRTIO_BLK_F_BARRIER)  |
> > \
> > +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE)
> > | \
> > +	(1ULL << VIRTIO_BLK_F_MQ))
> > +
> > +/* Not supported features */
> > +#define VHOST_VDPA_BLK_DISABLED_FEATURES
> (VHOST_BLK_DISABLED_FEATURES | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE)
> > | \
> > +	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
> > +
> > +/* Vhost-blk support protocol features */ #define
> > +VHOST_BLK_PROTOCOL_FEATURES \
> > +	((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
> > +	(1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
> > +
> > +#endif /* _VDPA_BLK_COMPACT_H_ */
> > diff --git a/examples/vdpa/vhost_user.h b/examples/vdpa/vhost_user.h
> > new file mode 100644 index 0000000..137bbc2
> > --- /dev/null
> > +++ b/examples/vdpa/vhost_user.h
> 
> I don't understand, why introduce this header? It seems never used.
> 
Sure. I will remove this file.
> Thanks,
> Chenbo
> 
> > @@ -0,0 +1,169 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation
> > + */
> > +
> > +#ifndef _VHOST_NET_USER_H
> > +#define _VHOST_NET_USER_H
> > +
> > +#include <stdint.h>
> > +#include <linux/vhost.h>
> > +
> > +#include "rte_vhost.h"
> > +
> > +/* refer to hw/virtio/vhost-user.c */
> > +
> > +#define VHOST_MEMORY_MAX_NREGIONS 8
> > +
> > +#ifndef VHOST_USER_MAX_CONFIG_SIZE
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +#endif
> > +
> > +#define VHOST_USER_PROTOCOL_FEATURES	((1ULL <<
> > VHOST_USER_PROTOCOL_F_MQ) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD)
> |\
> > +			(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
> > +			(1ULL <<
> VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
> > +			(1ULL <<
> VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
> > +			(1ULL <<
> VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
> > +			(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
> > +
> > +typedef enum VhostUserRequest {
> > +	VHOST_USER_NONE = 0,
> > +	VHOST_USER_GET_FEATURES = 1,
> > +	VHOST_USER_SET_FEATURES = 2,
> > +	VHOST_USER_SET_OWNER = 3,
> > +	VHOST_USER_RESET_OWNER = 4,
> > +	VHOST_USER_SET_MEM_TABLE = 5,
> > +	VHOST_USER_SET_LOG_BASE = 6,
> > +	VHOST_USER_SET_LOG_FD = 7,
> > +	VHOST_USER_SET_VRING_NUM = 8,
> > +	VHOST_USER_SET_VRING_ADDR = 9,
> > +	VHOST_USER_SET_VRING_BASE = 10,
> > +	VHOST_USER_GET_VRING_BASE = 11,
> > +	VHOST_USER_SET_VRING_KICK = 12,
> > +	VHOST_USER_SET_VRING_CALL = 13,
> > +	VHOST_USER_SET_VRING_ERR = 14,
> > +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> > +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> > +	VHOST_USER_GET_QUEUE_NUM = 17,
> > +	VHOST_USER_SET_VRING_ENABLE = 18,
> > +	VHOST_USER_SEND_RARP = 19,
> > +	VHOST_USER_NET_SET_MTU = 20,
> > +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> > +	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> > +	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> > +	VHOST_USER_POSTCOPY_ADVISE = 28,
> > +	VHOST_USER_POSTCOPY_LISTEN = 29,
> > +	VHOST_USER_POSTCOPY_END = 30,
> > +	VHOST_USER_GET_INFLIGHT_FD = 31,
> > +	VHOST_USER_SET_INFLIGHT_FD = 32,
> > +	VHOST_USER_MAX = 33
> > +} VhostUserRequest;
> > +
> > +typedef enum VhostUserSlaveRequest {
> > +	VHOST_USER_SLAVE_NONE = 0,
> > +	VHOST_USER_SLAVE_IOTLB_MSG = 1,
> > +	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
> > +	VHOST_USER_SLAVE_MAX
> > +} VhostUserSlaveRequest;
> > +
> > +typedef struct VhostUserMemoryRegion {
> > +	uint64_t guest_phys_addr;
> > +	uint64_t memory_size;
> > +	uint64_t userspace_addr;
> > +	uint64_t mmap_offset;
> > +} VhostUserMemoryRegion;
> > +
> > +typedef struct VhostUserMemory {
> > +	uint32_t nregions;
> > +	uint32_t padding;
> > +	VhostUserMemoryRegion
> regions[VHOST_MEMORY_MAX_NREGIONS];
> > +} VhostUserMemory;
> > +
> > +typedef struct VhostUserLog {
> > +	uint64_t mmap_size;
> > +	uint64_t mmap_offset;
> > +} VhostUserLog;
> > +
> > +/* Comply with Cryptodev-Linux */
> > +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
> > +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
> > +
> > +/* Same structure as vhost-user backend session info */
> > +typedef struct VhostUserCryptoSessionParam {
> > +	int64_t session_id;
> > +	uint32_t op_code;
> > +	uint32_t cipher_algo;
> > +	uint32_t cipher_key_len;
> > +	uint32_t hash_algo;
> > +	uint32_t digest_len;
> > +	uint32_t auth_key_len;
> > +	uint32_t aad_len;
> > +	uint8_t op_type;
> > +	uint8_t dir;
> > +	uint8_t hash_mode;
> > +	uint8_t chaining_dir;
> > +	uint8_t *ciphe_key;
> > +	uint8_t *auth_key;
> > +	uint8_t
> cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
> > +	uint8_t
> auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
> > +} VhostUserCryptoSessionParam;
> > +
> > +typedef struct VhostUserVringArea {
> > +	uint64_t u64;
> > +	uint64_t size;
> > +	uint64_t offset;
> > +} VhostUserVringArea;
> > +
> > +typedef struct VhostUserInflight {
> > +	uint64_t mmap_size;
> > +	uint64_t mmap_offset;
> > +	uint16_t num_queues;
> > +	uint16_t queue_size;
> > +} VhostUserInflight;
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> > +typedef struct VhostUserMsg {
> > +	union {
> > +		uint32_t master; /* a VhostUserRequest value */
> > +		uint32_t slave;  /* a VhostUserSlaveRequest value*/
> > +	} request;
> > +
> > +#define VHOST_USER_VERSION_MASK     0x3
> > +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> > +#define VHOST_USER_NEED_REPLY		(0x1 << 3)
> > +	uint32_t flags;
> > +	uint32_t size; /* the following payload size */
> > +	union {
> > +#define VHOST_USER_VRING_IDX_MASK   0xff
> > +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> > +		uint64_t u64;
> > +		struct vhost_vring_state state;
> > +		struct vhost_vring_addr addr;
> > +		VhostUserMemory memory;
> > +		VhostUserLog    log;
> > +		struct vhost_iotlb_msg iotlb;
> > +		VhostUserCryptoSessionParam crypto_session;
> > +		VhostUserVringArea area;
> > +		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> > +	} payload;
> > +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> > +	int fd_num;
> > +} __attribute((packed)) VhostUserMsg;
> > +
> > +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
> > +
> > +/* The version of the protocol we support */
> > +#define VHOST_USER_VERSION    0x1
> > +#endif
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v6 03/16] vhost: add vhost msg support
  2022-04-26  9:17           ` Xia, Chenbo
@ 2022-04-27  4:12             ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-04-27  4:12 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo, 
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Tuesday, April 26, 2022 5:17 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Tuesday, April 26, 2022 4:56 PM
> > To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > Changpeng <changpeng.liu@intel.com>
> > Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> >
> > HI Chenbo,
> >
> > Thanks for your reply.
> > My reply is inline.
> >
> > > -----Original Message-----
> > > From: Xia, Chenbo <chenbo.xia@intel.com>
> > > Sent: Monday, April 25, 2022 8:42 PM
> > > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > > Changpeng <changpeng.liu@intel.com>
> > > Subject: RE: [PATCH v6 03/16] vhost: add vhost msg support
> > >
> > > Hi Andy,
> > >
> > > > -----Original Message-----
> > > > From: Pei, Andy <andy.pei@intel.com>
> > > > Sent: Thursday, April 21, 2022 4:34 PM
> > > > To: dev@dpdk.org
> > > > Cc: Xia, Chenbo <chenbo.xia@intel.com>;
> > > > maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > > > Changpeng <changpeng.liu@intel.com>
> > > > Subject: [PATCH v6 03/16] vhost: add vhost msg support
> > > >
> > > > Add support for VHOST_USER_GET_CONFIG and
> > > VHOST_USER_SET_CONFIG.
> > > > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is
> only
> > > > supported by virtio blk VDPA device.
> > > >
> > > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > > ---
> > > >  lib/vhost/vhost_user.c | 69
> > > > ++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  lib/vhost/vhost_user.h | 13 ++++++++++
> > > >  2 files changed, 82 insertions(+)
> > > >
> > > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > > > 1d39067..3780804 100644
> > > > --- a/lib/vhost/vhost_user.c
> > > > +++ b/lib/vhost/vhost_user.c
> > > > @@ -80,6 +80,8 @@
> > > >  [VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> > > > [VHOST_USER_SET_SLAVE_REQ_FD]  =
> > > "VHOST_USER_SET_SLAVE_REQ_FD",
> > > >  [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > > > +[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > > > +[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> > > >  [VHOST_USER_CRYPTO_CREATE_SESS] =
> > > "VHOST_USER_CRYPTO_CREATE_SESS",
> > > >  [VHOST_USER_CRYPTO_CLOSE_SESS] =
> > > "VHOST_USER_CRYPTO_CLOSE_SESS",
> > > >  [VHOST_USER_POSTCOPY_ADVISE]  =
> > > "VHOST_USER_POSTCOPY_ADVISE", @@
> > > > -2542,6 +2544,71 @@ static int is_vring_iotlb(struct virtio_net
> > > > *dev, }
> > > >
> > > >  static int
> > > > +vhost_user_get_config(struct virtio_net **pdev, struct
> > > > +vhu_msg_context *ctx, int main_fd __rte_unused) { struct
> > > > +virtio_net *dev = *pdev; struct rte_vdpa_device *vdpa_dev =
> > > > +dev->vdpa_dev; int ret = 0;
> > > > +
> > > > +if (vdpa_dev->ops->get_config) {
> > > > +ret = vdpa_dev->ops->get_config(dev->vid,
> > > > +   ctx->msg.payload.cfg.region,
> > > > +   ctx->msg.payload.cfg.size);
> > > > +if (ret != 0) {
> > > > +ctx->msg.size = 0;
> > > > +VHOST_LOG_CONFIG(ERR,
> > > > + "(%s) get_config() return error!\n",
> > > > + dev->ifname);
> > > > +}
> > > > +} else {
> > > > +VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> > > supportted!\n",
> > >
> > > Supported
> > >
> > I will send out a new version to fix this.
> > > > + dev->ifname);
> > > > +}
> > > > +
> > > > +return RTE_VHOST_MSG_RESULT_REPLY; }
> > > > +
> > > > +static int
> > > > +vhost_user_set_config(struct virtio_net **pdev, struct
> > > > +vhu_msg_context *ctx, int main_fd __rte_unused) { struct
> > > > +virtio_net *dev = *pdev; struct rte_vdpa_device *vdpa_dev =
> > > > +dev->vdpa_dev; int ret = 0;
> > > > +
> > > > +if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> > >
> > > I think you should do sanity check on payload.cfg.size and make sure
> > it's
> > > smaller than VHOST_USER_MAX_CONFIG_SIZE
> > >
> > > and same check for offset
> > >
> > I think payload.cfg.size can be smaller than or equal to
> > VHOST_USER_MAX_CONFIG_SIZE.
> > payload.cfg.ofset can be smaller than or equal to
> > VHOST_USER_MAX_CONFIG_SIZE as well
> 
> After double check: offset is the config space offset, so this should be
> checked in vdpa driver. Size check on vhost lib layer should be just <=
> MAX_you_defined
> 
OK.
> Thanks,
> Chenbo
> 
> >
> > > > +VHOST_LOG_CONFIG(ERR,
> > > > +"(%s) invalid set config msg size: %"PRId32" != %d\n",
> > > > +dev->ifname, ctx->msg.size,
> > >
> > > Based on you will change the log too, payload.cfg.size is uint32_t,
> > > so
> > PRId32 ->
> > > PRIu32
> > >
> > > > +(int)sizeof(struct vhost_user_config));
> > >
> > > So this can be %u
> > >
> > Sure.
> > > > +goto OUT;
> > > > +}
> > > > +
> > > > +if (vdpa_dev->ops->set_config) {
> > > > +ret = vdpa_dev->ops->set_config(dev->vid,
> > > > +ctx->msg.payload.cfg.region,
> > > > +ctx->msg.payload.cfg.offset,
> > > > +ctx->msg.payload.cfg.size,
> > > > +ctx->msg.payload.cfg.flags);
> > > > +if (ret)
> > > > +VHOST_LOG_CONFIG(ERR,
> > > > + "(%s) set_config() return error!\n",
> > > > + dev->ifname);
> > > > +} else {
> > > > +VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> > > supportted!\n",
> > >
> > > Supported
> > >
> > I will send out a new version to fix this.
> > > > + dev->ifname);
> > > > +}
> > > > +
> > > > +return RTE_VHOST_MSG_RESULT_OK;
> > > > +
> > > > +OUT:
> > >
> > > Lower case looks better
> > >
> > OK. I will send out a new version to fix this.
> > > > +return RTE_VHOST_MSG_RESULT_ERR;
> > > > +}
> > >
> > > Almost all handlers need check on expected fd num (this case is 0),
> > > so
> > the
> > > above new handlers should also do that. Please refer to
> > > validate_msg_fds
> > in
> > > other handlers.
> > >
> > > BTW, you can wait for review for other patches and send new versions
> > later.
> > >
> > I will send out new patch after vhost: validate fds attached to
> > messages from David Marchand is merged.
> > > Thanks,
> > > Chenbo
> > >
> > > > +
> > > > +static int
> > > >  vhost_user_iotlb_msg(struct virtio_net **pdev,  struct
> > > > vhu_msg_context *ctx,  int main_fd __rte_unused) @@ -2782,6
> > > > +2849,8 @@ typedef int (*vhost_message_handler_t)(struct
> > > > virtio_net **pdev,  [VHOST_USER_NET_SET_MTU] =
> > > > vhost_user_net_set_mtu,  [VHOST_USER_SET_SLAVE_REQ_FD] =
> > > > vhost_user_set_req_fd,  [VHOST_USER_IOTLB_MSG] =
> > > > vhost_user_iotlb_msg,
> > > > +[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > > > +[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> > > >  [VHOST_USER_POSTCOPY_ADVISE] =
> > > vhost_user_set_postcopy_advise,
> > > >  [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> > > > [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> > > git
> > > > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > > > c946cc2..97cfb2f 100644
> > > > --- a/lib/vhost/vhost_user.h
> > > > +++ b/lib/vhost/vhost_user.h
> > > > @@ -50,6 +50,8 @@
> > > >  VHOST_USER_NET_SET_MTU = 20,
> > > >  VHOST_USER_SET_SLAVE_REQ_FD = 21,  VHOST_USER_IOTLB_MSG =
> 22,
> > > > +VHOST_USER_GET_CONFIG = 24,
> > > > +VHOST_USER_SET_CONFIG = 25,
> > > >  VHOST_USER_CRYPTO_CREATE_SESS = 26,
> VHOST_USER_CRYPTO_CLOSE_SESS
> > > > = 27,  VHOST_USER_POSTCOPY_ADVISE = 28, @@ -125,6 +127,16 @@
> > > > uint16_t queue_size;  } VhostUserInflight;
> > > >
> > > > +#define VHOST_USER_MAX_CONFIG_SIZE256
> > > > +
> > > > +/** Get/set config msg payload */ struct vhost_user_config {
> > > > +uint32_t offset; uint32_t size; uint32_t flags; uint8_t
> > > > +region[VHOST_USER_MAX_CONFIG_SIZE];
> > > > +};
> > > > +
> > > >  typedef struct VhostUserMsg {
> > > >  union {
> > > >  uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> > > +160,7 @@
> > > >  VhostUserCryptoSessionParam crypto_session;  VhostUserVringArea
> > > > area;  VhostUserInflight inflight;
> > > > +struct vhost_user_config cfg;
> > > >  } payload;
> > > >  /* Nothing should be added after the payload */  } __rte_packed
> > > > VhostUserMsg;
> > > > --
> > > > 1.8.3.1
> > >
> >
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (4 preceding siblings ...)
  2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-27  8:29   ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (17 more replies)
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                     ` (2 subsequent siblings)
  8 siblings, 18 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v7:
 Check on expected fd num in new vhost msg handler.
 Sanity check on vhost msg size.
 Fix typo.
 Add commit log to help understand code.
 Remove duplicated code.
 Add new API to get vDPA device type.
v6:
 Fix some commit log.
 Add vhost socket in log output to make it more user-friendly.
 When driver ops fail, just output some log, do not break message handler.
 Check vhost msg size in msg handler.
v5:
 Fix some coding style issues.
v4:
 Add args "isblk" to vdpa example to specify a block device, fix some
 Issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.
Andy Pei (18):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  vhost: add API to get vDPA device type
  vdpa/ifc: add get device type ops to ifc driver
  examples/vdpa: add vDPA blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: add set vring state for blk device
  vdpa/ifc: add some log at vDPA launch before qemu connect
  vdpa/ifc: read virtio max queues from hardware
  vdpa/ifc: add interrupt and handle for virtio blk
  vdpa/ifc: add is blk flag to ifcvf HW struct
  vdpa/ifc/base: access correct register for blk device
  vdpa/ifc: blk device pause without no inflight IO
  vhost: make sure each queue callfd is configured
 drivers/vdpa/ifc/base/ifcvf.c    |  36 +++-
 drivers/vdpa/ifc/base/ifcvf.h    |  20 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c    | 392 +++++++++++++++++++++++++++++++++++++--
 examples/vdpa/main.c             |  57 ++++++
 examples/vdpa/vdpa_blk_compact.h |  65 +++++++
 lib/vhost/rte_vhost.h            |  17 ++
 lib/vhost/socket.c               |  39 ++++
 lib/vhost/vdpa_driver.h          |  11 +-
 lib/vhost/version.map            |   2 +
 lib/vhost/vhost_user.c           |  97 ++++++++++
 lib/vhost/vhost_user.h           |  13 ++
 usertools/dpdk-devbind.py        |   5 +-
 12 files changed, 730 insertions(+), 24 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 02/18] vhost: add vDPA ops for " Andy Pei
                       ` (16 subsequent siblings)
  17 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
-
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
 	uint64_t features;
+	int device_type;
 	rte_atomic32_t started;
 	rte_atomic32_t dev_attached;
 	rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1306,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1375,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 02/18] vhost: add vDPA ops for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-27  8:29     ` [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 03/18] vhost: add vhost msg support Andy Pei
                       ` (15 subsequent siblings)
  17 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 03/18] vhost: add vhost msg support
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-04-27  8:29     ` [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-04-27  8:29     ` [PATCH v7 02/18] vhost: add vDPA ops for " Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-11 14:24       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (14 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 13 ++++++++
 2 files changed, 96 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..e925428 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) get_config() return error!\n",
+					 dev->ifname);
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) invalid set config msg size: %"PRIu32" != %d\n",
+			dev->ifname, ctx->msg.size,
+			(int)sizeof(struct vhost_user_config));
+		goto out;
+	}
+
+	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) vhost_user_config size: %"PRIu32", should not be larger than %d\n",
+			dev->ifname, ctx->msg.payload.cfg.size,
+			VHOST_USER_MAX_CONFIG_SIZE);
+		goto out;
+	}
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+		if (ret)
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) set_config() return error!\n",
+					 dev->ifname);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+
+out:
+	return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
 	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
 	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
 	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
 	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
 	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..97cfb2f 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -125,6 +127,16 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -148,6 +160,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 03/18] vhost: add vhost msg support Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
                       ` (13 subsequent siblings)
  17 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK		0xffffffff
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(INFO, "geometry");
+	DRV_LOG(INFO, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(INFO, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(INFO, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = NULL,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-11 14:35       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (12 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
For the block device type, we use one queue to transfer
both read and write requests, so we have to relay commands
on all queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..07fc3ca 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,6 +370,7 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
@@ -379,7 +380,13 @@ struct rte_vdpa_dev_info {
 	for (i = 0; i < nr_vring; i++) {
 		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
 		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
+		if (m_rx == true &&
+			((i & 1) == 0 || internal->device_type == IFCVF_BLK)) {
+			/* For the net we only need to relay rx queue,
+			 * which will change the mem of VM.
+			 * For the blk we need to relay all the read cmd
+			 * of each queue
+			 */
 			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
 			if (fd < 0) {
 				DRV_LOG(ERR, "can't setup eventfd: %s",
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 12:55       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 07/18] vhost: add API to get vDPA device type Andy Pei
                       ` (11 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add SW live-migration support to block device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 07fc3ca..8a260b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -312,6 +312,7 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
 	uint32_t i;
 	int vid;
 	uint64_t features = 0;
@@ -319,6 +320,22 @@ struct rte_vdpa_dev_info {
 	uint64_t len;
 
 	vid = internal->vid;
+
+	/* to make sure no packet is lost for blk device
+	 * do not stop until last_avail_idx == last_used_idx
+	 */
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
 	ifcvf_stop_hw(hw);
 
 	for (i = 0; i < hw->nr_vring; i++)
@@ -642,8 +659,10 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NET: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((internal->device_type == IFCVF_NET) && (i & 1)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
@@ -693,8 +712,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -756,7 +779,9 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
+	for (qid = 0; qid < q_num; qid += 1) {
+		if ((internal->device_type == IFCVF_NET) && (qid & 1))
+			continue;
 		ev.events = EPOLLIN | EPOLLPRI;
 		/* leave a flag to mark it's for interrupt */
 		ev.data.u64 = 1 | qid << 1 |
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 07/18] vhost: add API to get vDPA device type
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:14       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver Andy Pei
                       ` (10 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Vhost backend of different devices have different features.
Add a API to get vDPA device type, net device or blk device
currently, so users can set different features for different
kinds of devices.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/rte_vhost.h   | 17 +++++++++++++++++
 lib/vhost/socket.c      | 39 +++++++++++++++++++++++++++++++++++++++
 lib/vhost/vdpa_driver.h |  3 +++
 lib/vhost/version.map   |  2 ++
 4 files changed, 61 insertions(+)
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index c733f85..c977a24 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -117,6 +117,9 @@
 
 #define RTE_MAX_VHOST_DEVICE	1024
 
+#define VDPA_DEVICE_TYPE_NET 0
+#define VDPA_DEVICE_TYPE_BLK 1
+
 struct rte_vdpa_device;
 
 /**
@@ -486,6 +489,20 @@ struct rte_vdpa_device *
 rte_vhost_driver_get_vdpa_device(const char *path);
 
 /**
+ * Get the device type of the vdpa device.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param type
+ *  the device type of the vdpa device
+ * @return
+ *  0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index b304339..7da90e8 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -619,6 +619,45 @@ struct rte_vdpa_device *
 }
 
 int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
+{
+	struct vhost_user_socket *vsocket;
+	struct rte_vdpa_device *vdpa_dev;
+	uint32_t vdpa_type = 0;
+	int ret = 0;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (!vsocket) {
+		VHOST_LOG_CONFIG(ERR,
+				 "(%s) socket file is not registered yet.\n",
+				 path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	vdpa_dev = vsocket->vdpa_dev;
+	if (!vdpa_dev) {
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type) < 0) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) failed to get vdpa dev type for socket file.\n",
+			path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*type = vdpa_type;
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
 	struct vhost_user_socket *vsocket;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index e59a834..9cbd7cd 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
 	/** Set the device configuration space */
 	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
 		      uint32_t size, uint32_t flags);
+
+	/** get device type: net device, blk device... */
+	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
 };
 
 /**
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 0a66c58..fe4e8de 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -87,6 +87,8 @@ EXPERIMENTAL {
 
 	# added in 22.03
 	rte_vhost_async_dma_configure;
+
+	rte_vhost_driver_get_vdpa_dev_type;
 };
 
 INTERNAL {
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 07/18] vhost: add API to get vDPA device type Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:21       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example Andy Pei
                       ` (9 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add get device type ops to ifc driver.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8a260b7..99a6ab0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static int
+ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
+	uint32_t *type)
+{
+	RTE_SET_USED(vdev);
+	*type = VDPA_DEVICE_TYPE_BLK;
+	return 0;
+}
+
 static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
 	.get_queue_num = ifcvf_get_queue_num,
 	.get_features = ifcvf_get_vdpa_features,
@@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
 	.get_config = ifcvf_blk_get_config,
+	.get_dev_type = ifcvf_blk_get_device_type,
 };
 
 struct rte_vdpa_dev_info dev_info[] = {
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:34       ` Xia, Chenbo
  2022-04-27  8:29     ` [PATCH v7 10/18] usertools: add support for virtio blk device Andy Pei
                       ` (8 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to vDPA example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             | 57 +++++++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 65 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..2544141 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -159,8 +160,54 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
+	uint32_t device_type = 0;
 	int ret;
 	char *socket_path = vport->ifname;
 
@@ -192,6 +239,16 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
+	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
+		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..136c3f6
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_pci.h>
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1))
+
+#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX))
+
+#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
+	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 10/18] usertools: add support for virtio blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-04-27  8:29     ` [PATCH v7 11/18] vdpa/ifc: add set vring state for " Andy Pei
                       ` (7 subsequent siblings)
  17 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..7231be4 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -72,6 +72,9 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': None, 'SDevice': None}
+
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -82,7 +85,7 @@
 compress_devices = [cavium_zip]
 regex_devices = [cn9k_ree]
 misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
-                intel_ntb_skx, intel_ntb_icx]
+                intel_ntb_skx, intel_ntb_icx, virtio_blk]
 
 # global dict ethernet devices present. Dictionary indexed by PCI address.
 # Each device within this is itself a dictionary of device properties
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 11/18] vdpa/ifc: add set vring state for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 10/18] usertools: add support for virtio blk device Andy Pei
@ 2022-04-27  8:29     ` Andy Pei
  2022-05-12 13:44       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
                       ` (6 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:29 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 99a6ab0..ca49bc3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1290,6 +1290,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+	RTE_SET_USED(vid);
+	RTE_SET_USED(vring);
+	RTE_SET_USED(state);
+
+	return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
 	uint64_t *features)
 {
@@ -1316,7 +1326,7 @@ struct rte_vdpa_dev_info {
 	.get_protocol_features = ifcvf_blk_get_protocol_features,
 	.dev_conf = ifcvf_dev_config,
 	.dev_close = ifcvf_dev_close,
-	.set_vring_state = NULL,
+	.set_vring_state = ifcvf_blk_set_vring_state,
 	.migration_done = NULL,
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-04-27  8:29     ` [PATCH v7 11/18] vdpa/ifc: add set vring state for " Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-12 13:53       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware Andy Pei
                       ` (5 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ca49bc3..4060a44 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(INFO, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(INFO, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(INFO, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(INFO, "geometry");
+		DRV_LOG(INFO, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(INFO, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(INFO, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(INFO, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-12 13:55       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
                       ` (4 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4060a44..5a8cf1c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1457,6 +1457,10 @@ struct rte_vdpa_dev_info dev_info[] = {
 			internal->hw.blk_cfg->geometry.sectors);
 		DRV_LOG(INFO, "num_queues: 0x%08x",
 			internal->hw.blk_cfg->num_queues);
+
+		/* reset max_queue here, to minimum modification */
+		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:52       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
                       ` (3 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a8cf1c..0e94e1f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid;	/* thread for intr relay */
 	int epfd;
+	int csc_fd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -558,6 +560,107 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_fd, csc_val = 0;
+
+	csc_fd = epoll_create(1);
+	if (csc_fd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_fd = csc_fd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail\n");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	int ret;
+
+	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+			(void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_fd >= 0)
+		close(internal->csc_fd);
+	internal->csc_fd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -584,10 +687,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -880,6 +989,9 @@ struct rte_vdpa_dev_info {
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
 	vdpa_ifcvf_stop(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (13 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:55       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device Andy Pei
                       ` (2 subsequent siblings)
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
Add is_blk flag to ifcvf_hw, and init is_blk during probe.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..8591ef1 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	u8 is_blk;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0e94e1f..4923bc1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1536,11 +1536,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
 	if (device_id == VIRTIO_ID_NET) {
 		internal->device_type = IFCVF_NET;
+		internal->hw.is_blk = IFCVF_NET;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_NET].features;
 	} else if (device_id == VIRTIO_ID_BLOCK) {
 		internal->device_type = IFCVF_BLK;
+		internal->hw.is_blk = IFCVF_BLK;
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (14 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:57       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO Andy Pei
  2022-04-27  8:30     ` [PATCH v7 18/18] vhost: make sure each queue callfd is configured Andy Pei
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..4d5881a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->is_blk == IFCVF_BLK) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		} else if (hw->is_blk == IFCVF_NET) {
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		}
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->is_blk) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		} else if (hw->is_blk == IFCVF_NET) {
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+		}
+
+		if (hw->is_blk == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else if (hw->is_blk == IFCVF_NET)
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8591ef1..ff11b12 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG	9
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (15 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  2:59       ` Xia, Chenbo
  2022-04-27  8:30     ` [PATCH v7 18/18] vhost: make sure each queue callfd is configured Andy Pei
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4923bc1..def6adf 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -314,12 +314,12 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
 	struct ifcvf_hw *hw = &internal->hw;
-	struct rte_vhost_vring vq;
 	uint32_t i;
 	int vid;
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
 
@@ -328,13 +328,22 @@ struct rte_vdpa_dev_info {
 	 */
 	if (internal->device_type == IFCVF_BLK) {
 		for (i = 0; i < hw->nr_vring; i++) {
-			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-			while (vq.avail->idx != vq.used->idx) {
-				ifcvf_notify_queue(hw, i);
-				usleep(10);
-			}
-			hw->vring[i].last_avail_idx = vq.avail->idx;
-			hw->vring[i].last_used_idx = vq.used->idx;
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
 		}
 	}
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v7 18/18] vhost: make sure each queue callfd is configured
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (16 preceding siblings ...)
  2022-04-27  8:30     ` [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO Andy Pei
@ 2022-04-27  8:30     ` Andy Pei
  2022-05-13  3:10       ` Xia, Chenbo
  17 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-04-27  8:30 UTC (permalink / raw)
  To: dev; +Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index e925428..82122b6 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3230,12 +3230,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
 	if (!vdpa_dev)
 		goto out;
 
+	if (request != VHOST_USER_SET_VRING_CALL)
+		goto out;
+
 	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
 		if (vdpa_dev->ops->dev_conf(dev->vid))
 			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n",
 					dev->ifname);
 		else
 			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+	} else {
+		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+		 * close the device and config the device again,
+		 * make sure the call fd of each queue is configured correctly.
+		 */
+		if (vdpa_dev->ops->dev_close(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to close vDPA device\n");
+		if (vdpa_dev->ops->dev_conf(dev->vid))
+			VHOST_LOG_CONFIG(ERR,
+					 "Failed to re-config vDPA device\n");
 	}
 
 out:
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 03/18] vhost: add vhost msg support
  2022-04-27  8:29     ` [PATCH v7 03/18] vhost: add vhost msg support Andy Pei
@ 2022-05-11 14:24       ` Xia, Chenbo
  2022-05-12  3:50         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-11 14:24 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 03/18] vhost: add vhost msg support
Title better be: vhost: add vhost msg support for get/set config
> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 83
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 13 ++++++++
>  2 files changed, 96 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 1d39067..e925428 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -80,6 +80,8 @@
>  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
>  	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
>  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
>  	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>  	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
>  	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
> @@ -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) get_config() return error!\n",
> +					 dev->ifname);
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) invalid set config msg size: %"PRIu32" != %d\n",
> +			dev->ifname, ctx->msg.size,
> +			(int)sizeof(struct vhost_user_config));
> +		goto out;
> +	}
Sorry, I was wrong in v6, after double check, the size can be checked in read_vhost_message
(although not that accurate check). So we can remove above. (btw, even we check, the logic should
Be size <= sizeof(config))
Rest of the patch seems good, thanks.
Chenbo
> +
> +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) vhost_user_config size: %"PRIu32", should not be
> larger than %d\n",
> +			dev->ifname, ctx->msg.payload.cfg.size,
> +			VHOST_USER_MAX_CONFIG_SIZE);
> +		goto out;
> +	}
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +		if (ret)
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) set_config() return error!\n",
> +					 dev->ifname);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_OK;
> +
> +out:
> +	return RTE_VHOST_MSG_RESULT_ERR;
> +}
> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct
> virtio_net **pdev,
>  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
>  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
>  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
>  	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
>  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
>  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index c946cc2..97cfb2f 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -125,6 +127,16 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -148,6 +160,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
  2022-04-27  8:29     ` [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
@ 2022-05-11 14:35       ` Xia, Chenbo
  2022-05-12  3:49         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-11 14:35 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
> 
> For the block device type, we use one queue to transfer
> both read and write requests, so we have to relay commands
relay interrupt?
I suggest using this commit log:
For the net device type, only interrupt of rxq needed to be relayed.
But for block, since all the queues are used for both read and write
requests. Interrupt of all queues needed to be relayed.
With this fixed:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> on all queues.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8ee041f..07fc3ca 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -370,6 +370,7 @@ struct rte_vdpa_dev_info {
>  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>  	irq_set->start = 0;
>  	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change
> notification */
>  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>  		rte_intr_fd_get(internal->pdev->intr_handle);
> 
> @@ -379,7 +380,13 @@ struct rte_vdpa_dev_info {
>  	for (i = 0; i < nr_vring; i++) {
>  		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
>  		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> +		if (m_rx == true &&
> +			((i & 1) == 0 || internal->device_type == IFCVF_BLK)) {
> +			/* For the net we only need to relay rx queue,
> +			 * which will change the mem of VM.
> +			 * For the blk we need to relay all the read cmd
> +			 * of each queue
> +			 */
>  			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>  			if (fd < 0) {
>  				DRV_LOG(ERR, "can't setup eventfd: %s",
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
  2022-05-11 14:35       ` Xia, Chenbo
@ 2022-05-12  3:49         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-12  3:49 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
Sure, I will use your commit log suggestion, and I will also change subject to "vdpa/ifc: add vDPA interrupt relay for blk device"
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Wednesday, May 11, 2022 10:36 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
> >
> > For the block device type, we use one queue to transfer both read and
> > write requests, so we have to relay commands
> 
> relay interrupt?
> 
> I suggest using this commit log:
> 
> For the net device type, only interrupt of rxq needed to be relayed.
> But for block, since all the queues are used for both read and write requests.
> Interrupt of all queues needed to be relayed.
> 
> With this fixed:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
> > on all queues.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8ee041f..07fc3ca 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -370,6 +370,7 @@ struct rte_vdpa_dev_info {
> >  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> >  	irq_set->start = 0;
> >  	fd_ptr = (int *)&irq_set->data;
> > +	/* The first interrupt is for the configure space change
> > notification */
> >  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
> >  		rte_intr_fd_get(internal->pdev->intr_handle);
> >
> > @@ -379,7 +380,13 @@ struct rte_vdpa_dev_info {
> >  	for (i = 0; i < nr_vring; i++) {
> >  		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
> >  		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> > -		if ((i & 1) == 0 && m_rx == true) {
> > +		if (m_rx == true &&
> > +			((i & 1) == 0 || internal->device_type == IFCVF_BLK)) {
> > +			/* For the net we only need to relay rx queue,
> > +			 * which will change the mem of VM.
> > +			 * For the blk we need to relay all the read cmd
> > +			 * of each queue
> > +			 */
> >  			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> >  			if (fd < 0) {
> >  				DRV_LOG(ERR, "can't setup eventfd: %s",
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 03/18] vhost: add vhost msg support
  2022-05-11 14:24       ` Xia, Chenbo
@ 2022-05-12  3:50         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-12  3:50 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Wednesday, May 11, 2022 10:24 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 03/18] vhost: add vhost msg support
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 03/18] vhost: add vhost msg support
> 
> Title better be: vhost: add vhost msg support for get/set config
> 
Sure. 
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 83
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 13 ++++++++
> >  2 files changed, 96 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 1d39067..e925428 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -80,6 +80,8 @@
> >  	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
> >  	[VHOST_USER_SET_SLAVE_REQ_FD]  =
> "VHOST_USER_SET_SLAVE_REQ_FD",
> >  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
> > +	[VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
> > +	[VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
> >  	[VHOST_USER_CRYPTO_CREATE_SESS] =
> "VHOST_USER_CRYPTO_CREATE_SESS",
> >  	[VHOST_USER_CRYPTO_CLOSE_SESS] =
> "VHOST_USER_CRYPTO_CLOSE_SESS",
> >  	[VHOST_USER_POSTCOPY_ADVISE]  =
> "VHOST_USER_POSTCOPY_ADVISE", @@
> > -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev,
> > }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (vdpa_dev->ops->get_config) {
> > +		ret = vdpa_dev->ops->get_config(dev->vid,
> > +					   ctx->msg.payload.cfg.region,
> > +					   ctx->msg.payload.cfg.size);
> > +		if (ret != 0) {
> > +			ctx->msg.size = 0;
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) get_config() return error!\n",
> > +					 dev->ifname);
> > +		}
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (ctx->msg.size != sizeof(struct vhost_user_config)) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) invalid set config msg size: %"PRIu32" != %d\n",
> > +			dev->ifname, ctx->msg.size,
> > +			(int)sizeof(struct vhost_user_config));
> > +		goto out;
> > +	}
> 
> Sorry, I was wrong in v6, after double check, the size can be checked in
> read_vhost_message (although not that accurate check). So we can remove
> above. (btw, even we check, the logic should Be size <= sizeof(config))
> 
> Rest of the patch seems good, thanks.
> 
> Chenbo
> 
OK. I will remove ctx->msg.size check here.
> > +
> > +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) vhost_user_config size: %"PRIu32", should not
> be
> > larger than %d\n",
> > +			dev->ifname, ctx->msg.payload.cfg.size,
> > +			VHOST_USER_MAX_CONFIG_SIZE);
> > +		goto out;
> > +	}
> > +
> > +	if (vdpa_dev->ops->set_config) {
> > +		ret = vdpa_dev->ops->set_config(dev->vid,
> > +			ctx->msg.payload.cfg.region,
> > +			ctx->msg.payload.cfg.offset,
> > +			ctx->msg.payload.cfg.size,
> > +			ctx->msg.payload.cfg.flags);
> > +		if (ret)
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) set_config() return error!\n",
> > +					 dev->ifname);
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_OK;
> > +
> > +out:
> > +	return RTE_VHOST_MSG_RESULT_ERR;
> > +}
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >  			struct vhu_msg_context *ctx,
> >  			int main_fd __rte_unused)
> > @@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct
> > virtio_net **pdev,
> >  	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
> >  	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
> >  	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
> > +	[VHOST_USER_GET_CONFIG] = vhost_user_get_config,
> > +	[VHOST_USER_SET_CONFIG] = vhost_user_set_config,
> >  	[VHOST_USER_POSTCOPY_ADVISE] =
> vhost_user_set_postcopy_advise,
> >  	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
> >  	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --
> git
> > a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > c946cc2..97cfb2f 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >  	VHOST_USER_NET_SET_MTU = 20,
> >  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >  	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_GET_CONFIG = 24,
> > +	VHOST_USER_SET_CONFIG = 25,
> >  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >  	VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -125,6 +127,16 @@
> >  	uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> >  typedef struct VhostUserMsg {
> >  	union {
> >  		uint32_t master; /* a VhostUserRequest value */ @@ -148,6
> +160,7 @@
> >  		VhostUserCryptoSessionParam crypto_session;
> >  		VhostUserVringArea area;
> >  		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> >  	} payload;
> >  	/* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
  2022-04-27  8:29     ` [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-05-12 12:55       ` Xia, Chenbo
  2022-05-13  3:32         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 12:55 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
> 
> Add SW live-migration support to block device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 33 +++++++++++++++++++++++++++++----
>  1 file changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 07fc3ca..8a260b7 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -312,6 +312,7 @@ struct rte_vdpa_dev_info {
>  vdpa_ifcvf_stop(struct ifcvf_internal *internal)
>  {
>  	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
>  	uint32_t i;
>  	int vid;
>  	uint64_t features = 0;
> @@ -319,6 +320,22 @@ struct rte_vdpa_dev_info {
>  	uint64_t len;
> 
>  	vid = internal->vid;
> +
> +	/* to make sure no packet is lost for blk device
> +	 * do not stop until last_avail_idx == last_used_idx
> +	 */
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
This seems not match with the above comment about avoiding in-flight packets.
But the change in patch 17 seems good. Why not just using the implementation
in patch 17?
Thanks,
Chenbo
>  	ifcvf_stop_hw(hw);
> 
>  	for (i = 0; i < hw->nr_vring; i++)
> @@ -642,8 +659,10 @@ struct rte_vdpa_dev_info {
>  		}
>  		hw->vring[i].avail = gpa;
> 
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NET: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((internal->device_type == IFCVF_NET) && (i & 1)) {
>  			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>  			if (gpa == 0) {
>  				DRV_LOG(ERR, "Fail to get GPA for used ring.");
> @@ -693,8 +712,12 @@ struct rte_vdpa_dev_info {
> 
>  	for (i = 0; i < hw->nr_vring; i++) {
>  		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (internal->device_type == IFCVF_NET) {
> +			if ((i & 1) == 0)
> +				update_used_ring(internal, i);
> +		} else if (internal->device_type == IFCVF_BLK) {
>  			update_used_ring(internal, i);
> +		}
> 
>  		rte_vhost_get_vhost_vring(vid, i, &vq);
>  		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -756,7 +779,9 @@ struct rte_vdpa_dev_info {
>  		}
>  	}
> 
> -	for (qid = 0; qid < q_num; qid += 2) {
> +	for (qid = 0; qid < q_num; qid += 1) {
> +		if ((internal->device_type == IFCVF_NET) && (qid & 1))
> +			continue;
>  		ev.events = EPOLLIN | EPOLLPRI;
>  		/* leave a flag to mark it's for interrupt */
>  		ev.data.u64 = 1 | qid << 1 |
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 07/18] vhost: add API to get vDPA device type
  2022-04-27  8:29     ` [PATCH v7 07/18] vhost: add API to get vDPA device type Andy Pei
@ 2022-05-12 13:14       ` Xia, Chenbo
  2022-05-13  4:15         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:14 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 07/18] vhost: add API to get vDPA device type
> 
> Vhost backend of different devices have different features.
> Add a API to get vDPA device type, net device or blk device
> currently, so users can set different features for different
> kinds of devices.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/rte_vhost.h   | 17 +++++++++++++++++
>  lib/vhost/socket.c      | 39 +++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vdpa_driver.h |  3 +++
>  lib/vhost/version.map   |  2 ++
>  4 files changed, 61 insertions(+)
> 
> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
> index c733f85..c977a24 100644
> --- a/lib/vhost/rte_vhost.h
> +++ b/lib/vhost/rte_vhost.h
> @@ -117,6 +117,9 @@
> 
>  #define RTE_MAX_VHOST_DEVICE	1024
> 
> +#define VDPA_DEVICE_TYPE_NET 0
> +#define VDPA_DEVICE_TYPE_BLK 1
> +
>  struct rte_vdpa_device;
> 
>  /**
> @@ -486,6 +489,20 @@ struct rte_vdpa_device *
>  rte_vhost_driver_get_vdpa_device(const char *path);
> 
>  /**
> + * Get the device type of the vdpa device.
> + *
> + * @param path
> + *  The vhost-user socket file path
> + * @param type
> + *  the device type of the vdpa device
> + * @return
> + *  0 on success, -1 on failure
> + */
> +__rte_experimental
> +int
> +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
> +
> +/**
>   * Set the feature bits the vhost-user driver supports.
>   *
>   * @param path
> diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
> index b304339..7da90e8 100644
> --- a/lib/vhost/socket.c
> +++ b/lib/vhost/socket.c
> @@ -619,6 +619,45 @@ struct rte_vdpa_device *
>  }
> 
>  int
> +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
> +{
> +	struct vhost_user_socket *vsocket;
> +	struct rte_vdpa_device *vdpa_dev;
> +	uint32_t vdpa_type = 0;
> +	int ret = 0;
> +
> +	pthread_mutex_lock(&vhost_user.mutex);
> +	vsocket = find_vhost_user_socket(path);
> +	if (!vsocket) {
> +		VHOST_LOG_CONFIG(ERR,
> +				 "(%s) socket file is not registered yet.\n",
> +				 path);
> +		ret = -1;
> +		goto unlock_exit;
> +	}
> +
> +	vdpa_dev = vsocket->vdpa_dev;
> +	if (!vdpa_dev) {
> +		ret = -1;
> +		goto unlock_exit;
> +	}
> +
> +	if (vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type) < 0) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) failed to get vdpa dev type for socket file.\n",
> +			path);
> +		ret = -1;
> +		goto unlock_exit;
> +	}
If vendor's vdpa driver does not implement this callback, should return type NET.
Another way to do may be make every vdpa driver implement the callback, but since
other vendors only have one type. I prefer the first way.
> +
> +	*type = vdpa_type;
> +
> +unlock_exit:
> +	pthread_mutex_unlock(&vhost_user.mutex);
> +	return ret;
> +}
> +
> +int
>  rte_vhost_driver_disable_features(const char *path, uint64_t features)
>  {
>  	struct vhost_user_socket *vsocket;
> diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
> index e59a834..9cbd7cd 100644
> --- a/lib/vhost/vdpa_driver.h
> +++ b/lib/vhost/vdpa_driver.h
> @@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
>  	/** Set the device configuration space */
>  	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
>  		      uint32_t size, uint32_t flags);
> +
> +	/** get device type: net device, blk device... */
> +	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
>  };
> 
>  /**
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index 0a66c58..fe4e8de 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -87,6 +87,8 @@ EXPERIMENTAL {
> 
>  	# added in 22.03
>  	rte_vhost_async_dma_configure;
> +
> +	rte_vhost_driver_get_vdpa_dev_type;
Missed '# added in 22.07' tag, but when you do v8, this may not be a problem
as other patches may add this tag with new API introduced.
And introducing new API will need update of release note.
Please refer to http://git.dpdk.org/next/dpdk-next-virtio/commit/?id=868883e899af386abcc298ea80ec7f6a18d8a8e7
as an example.
Thanks,
Chenbo
>  };
> 
>  INTERNAL {
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
  2022-04-27  8:29     ` [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-05-12 13:21       ` Xia, Chenbo
  2022-05-12 13:40         ` Xia, Chenbo
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:21 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
> 
> Add get device type ops to ifc driver.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8a260b7..99a6ab0 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
>  	return 0;
>  }
> 
> +static int
> +ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
> +	uint32_t *type)
> +{
> +	RTE_SET_USED(vdev);
> +	*type = VDPA_DEVICE_TYPE_BLK;
> +	return 0;
This is not right. Remember net and blk are both using the driver?
This will lead to using net also returns BLK.
And I suggest the patch-set validated with both blk and net...
Besides, ifcvf_blk_get_device_type should be ifcvf_get_device_type
Thanks,
Chenbo
> +}
> +
>  static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
>  	.get_queue_num = ifcvf_get_queue_num,
>  	.get_features = ifcvf_get_vdpa_features,
> @@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info {
>  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
>  	.get_notify_area = ifcvf_get_notify_area,
>  	.get_config = ifcvf_blk_get_config,
> +	.get_dev_type = ifcvf_blk_get_device_type,
>  };
> 
>  struct rte_vdpa_dev_info dev_info[] = {
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example
  2022-04-27  8:29     ` [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example Andy Pei
@ 2022-05-12 13:34       ` Xia, Chenbo
  2022-05-13  8:16         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:34 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example
Better be: examples/vdpa: add virtio blk support
> 
> Add virtio blk device support to vDPA example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  examples/vdpa/main.c             | 57 +++++++++++++++++++++++++++++++++++
>  examples/vdpa/vdpa_blk_compact.h | 65
> ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 122 insertions(+)
>  create mode 100644 examples/vdpa/vdpa_blk_compact.h
> 
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
> index 5ab0765..2544141 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>  #include <cmdline_parse_string.h>
>  #include <cmdline_parse_num.h>
>  #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
> 
>  #define MAX_PATH_LEN 128
>  #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -159,8 +160,54 @@ struct vdpa_port {
>  };
> 
>  static int
> +vdpa_blk_device_set_features_and_protocol(const char *path)
> +{
> +	uint64_t protocol_features = 0;
> +	int ret;
> +
> +	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_disable_features(path,
> +		VHOST_BLK_DISABLED_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_disable_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_get_protocol_features(path,
> &protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_get_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
> +
> +	ret = rte_vhost_driver_set_protocol_features(path,
> protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
This goto is not needed.
> +	}
> +
> +out:
> +	return ret;
> +}
> +
> +static int
>  start_vdpa(struct vdpa_port *vport)
>  {
> +	uint32_t device_type = 0;
>  	int ret;
>  	char *socket_path = vport->ifname;
> 
> @@ -192,6 +239,16 @@ struct vdpa_port {
>  			"attach vdpa device failed: %s\n",
>  			socket_path);
> 
> +	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
> +	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
> +		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
Should add info of socket path
> +		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +				"set vhost blk driver features and protocol
> features failed: %s\n",
> +				socket_path);
> +	}
> +
>  	if (rte_vhost_driver_start(socket_path) < 0)
>  		rte_exit(EXIT_FAILURE,
>  			"start vhost driver failed: %s\n",
> diff --git a/examples/vdpa/vdpa_blk_compact.h
> b/examples/vdpa/vdpa_blk_compact.h
> new file mode 100644
> index 0000000..136c3f6
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,65 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
> +
> +#include <stdbool.h>
> +
> +#include <rte_pci.h>
above two headers are not used in this file?
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size
> */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments
> */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> config */
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +
> +#ifndef VHOST_USER_F_PROTOCOL_FEATURES
> +#define VHOST_USER_F_PROTOCOL_FEATURES 30
> +#endif
It's already in rte_vhost.h, so no need to re-define.
Thanks,
Chenbo
> +
> +#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1))
> +
> +#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL <<
> VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX))
> +
> +#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
> \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  |
> \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE)
> | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
> +	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
> +
> +/* Vhost-blk support protocol features */
> +#define VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
  2022-05-12 13:21       ` Xia, Chenbo
@ 2022-05-12 13:40         ` Xia, Chenbo
  2022-05-13  7:38           ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:40 UTC (permalink / raw)
  To: Xia, Chenbo, Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 9:22 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc
> driver
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> > Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> > Subject: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc
> driver
> >
> > Add get device type ops to ifc driver.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++++++++++
> >  1 file changed, 10 insertions(+)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > index 8a260b7..99a6ab0 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
> >  	return 0;
> >  }
> >
> > +static int
> > +ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
> > +	uint32_t *type)
> > +{
> > +	RTE_SET_USED(vdev);
> > +	*type = VDPA_DEVICE_TYPE_BLK;
> > +	return 0;
> 
> This is not right. Remember net and blk are both using the driver?
> This will lead to using net also returns BLK.
Correct a bit for above: will lead to net not working. So implement
callback for both (although in the API, NULL callback can return NET type)
Thanks,
Chenbo
> 
> And I suggest the patch-set validated with both blk and net...
> 
> Besides, ifcvf_blk_get_device_type should be ifcvf_get_device_type
> 
> Thanks,
> Chenbo
> 
> > +}
> > +
> >  static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
> >  	.get_queue_num = ifcvf_get_queue_num,
> >  	.get_features = ifcvf_get_vdpa_features,
> > @@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info {
> >  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> >  	.get_notify_area = ifcvf_get_notify_area,
> >  	.get_config = ifcvf_blk_get_config,
> > +	.get_dev_type = ifcvf_blk_get_device_type,
> >  };
> >
> >  struct rte_vdpa_dev_info dev_info[] = {
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 11/18] vdpa/ifc: add set vring state for blk device
  2022-04-27  8:29     ` [PATCH v7 11/18] vdpa/ifc: add set vring state for " Andy Pei
@ 2022-05-12 13:44       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:44 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 11/18] vdpa/ifc: add set vring state for blk device
> 
> Set_vring_state op is mandatory, add set_vring_state for blk device.
> Currently set_vring_state for blk device is not implemented.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++++++++++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 99a6ab0..ca49bc3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1290,6 +1290,16 @@ struct rte_vdpa_dev_info {
>  }
> 
>  static int
> +ifcvf_blk_set_vring_state(int vid, int vring, int state)
> +{
> +	RTE_SET_USED(vid);
> +	RTE_SET_USED(vring);
> +	RTE_SET_USED(state);
> +
> +	return 0;
> +}
> +
> +static int
>  ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
>  	uint64_t *features)
>  {
> @@ -1316,7 +1326,7 @@ struct rte_vdpa_dev_info {
>  	.get_protocol_features = ifcvf_blk_get_protocol_features,
>  	.dev_conf = ifcvf_dev_config,
>  	.dev_close = ifcvf_dev_close,
> -	.set_vring_state = NULL,
> +	.set_vring_state = ifcvf_blk_set_vring_state,
About naming in this driver, if you use two functions for net/blk
and name the one for blk as XXX_blk_XXX. Better rename the net one
as XXX_net_XXX.
In this case, better to rename ifcvf_set_vring_state
Thanks,
Chenbo
>  	.migration_done = NULL,
>  	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
>  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-04-27  8:30     ` [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
@ 2022-05-12 13:53       ` Xia, Chenbo
  2022-05-13  8:34         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:53 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> qemu connect
Title can be:
vdpa/ifc: add log for config space of virtio blk
> 
> Add some log of virtio blk device config space information
> at VDPA launch before qemu connects.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
>  1 file changed, 28 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index ca49bc3..4060a44 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
>  	struct rte_kvargs *kvlist = NULL;
>  	int ret = 0;
>  	int16_t device_id;
> +	uint64_t capacity = 0;
> +	uint8_t *byte;
> +	uint32_t i;
> 
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
> @@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
>  		internal->features = features &
>  					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
>  		internal->features |= dev_info[IFCVF_BLK].features;
> +
> +		/* cannot read 64-bit register in one attempt,
> +		 * so read byte by byte.
> +		 */
> +		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> +			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> +			capacity |= (uint64_t)*byte << (i * 8);
> +		}
> +		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
I believe this '21' should be calculated rather than hard-code.
And should these log all be debug log?
Thanks,
Chenbo
> +
> +		DRV_LOG(INFO, "size_max  : 0x%08x",
> +			internal->hw.blk_cfg->size_max);
> +		DRV_LOG(INFO, "seg_max   : 0x%08x",
> +			internal->hw.blk_cfg->seg_max);
> +		DRV_LOG(INFO, "blk_size  : 0x%08x",
> +			internal->hw.blk_cfg->blk_size);
> +		DRV_LOG(INFO, "geometry");
> +		DRV_LOG(INFO, "    cylinders: %u",
> +			internal->hw.blk_cfg->geometry.cylinders);
> +		DRV_LOG(INFO, "    heads    : %u",
> +			internal->hw.blk_cfg->geometry.heads);
> +		DRV_LOG(INFO, "    sectors  : %u",
> +			internal->hw.blk_cfg->geometry.sectors);
> +		DRV_LOG(INFO, "num_queues: 0x%08x",
> +			internal->hw.blk_cfg->num_queues);
>  	}
> 
>  	list->internal = internal;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware
  2022-04-27  8:30     ` [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware Andy Pei
@ 2022-05-12 13:55       ` Xia, Chenbo
  2022-05-13  8:58         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-12 13:55 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware
> 
> Original code max_queues is set to IFCVF_MAX_QUEUES.
> New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 4060a44..5a8cf1c 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1457,6 +1457,10 @@ struct rte_vdpa_dev_info dev_info[] = {
>  			internal->hw.blk_cfg->geometry.sectors);
>  		DRV_LOG(INFO, "num_queues: 0x%08x",
>  			internal->hw.blk_cfg->num_queues);
> +
> +		/* reset max_queue here, to minimum modification */
> +		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
> +			internal->hw.blk_cfg->num_queues);
MQ is not supported now in this driver, should we make this change later?
Thanks,
Chenbo
>  	}
> 
>  	list->internal = internal;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk
  2022-04-27  8:30     ` [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
@ 2022-05-13  2:52       ` Xia, Chenbo
  2022-05-13 10:10         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-13  2:52 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio
> blk
Better be: vdpa/ifc: add interrupt handling for config space
> 
> Create a thread to poll and relay config space change interrupt.
> Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
Inform QEMU. You don't need to save words in commit log. The commit log
should be as detailed as possible to make readers understand quickly what
the commit is doing :)
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 112
> ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 112 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 5a8cf1c..0e94e1f 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -53,7 +53,9 @@ struct ifcvf_internal {
>  	int vfio_group_fd;
>  	int vfio_dev_fd;
>  	pthread_t tid;	/* thread for notify relay */
> +	pthread_t intr_tid;	/* thread for intr relay */
Thread for virtio-blk config space change interrupt relay
>  	int epfd;
> +	int csc_fd;
csc_epfd
>  	int vid;
>  	struct rte_vdpa_device *vdev;
>  	uint16_t max_queues;
> @@ -558,6 +560,107 @@ struct rte_vdpa_dev_info {
>  	return 0;
>  }
> 
> +static void
> +virtio_interrupt_handler(struct ifcvf_internal *internal)
> +{
> +	int vid = internal->vid;
> +	int ret;
> +
> +	ret = rte_vhost_slave_config_change(vid, 1);
> +	if (ret)
> +		DRV_LOG(ERR, "failed to notify the guest about configuration
> space change.");
> +}
> +
> +static void *
> +intr_relay(void *arg)
> +{
> +	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
> +	struct epoll_event csc_event;
> +	struct epoll_event ev;
> +	uint64_t buf;
> +	int nbytes;
> +	int csc_fd, csc_val = 0;
> +
> +	csc_fd = epoll_create(1);
> +	if (csc_fd < 0) {
> +		DRV_LOG(ERR, "failed to create epoll for config space
> change.");
> +		return NULL;
> +	}
> +
> +	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
> +	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
> +	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
> +		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
> +		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> +		return NULL;
Close the epfd and set to -1 if err.
> +	}
> +
> +	internal->csc_fd = csc_fd;
> +
> +	for (;;) {
> +		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
> +		if (csc_val < 0) {
> +			if (errno == EINTR)
> +				continue;
> +			DRV_LOG(ERR, "epoll_wait return fail\n");
Save '\n', it's not needed for DRV_LOG. Please check other DRV_LOGs
> +			return NULL;
> +		} else if (csc_val == 0) {
> +			continue;
> +		} else {
> +			/* csc_val > 0 */
> +			nbytes = read(csc_event.data.fd, &buf, 8);
> +			if (nbytes < 0) {
> +				if (errno == EINTR || errno == EWOULDBLOCK)
EAGAIN should also be this case?
> +					continue;
> +				DRV_LOG(ERR, "Error reading from file
> descriptor %d: %s\n",
> +					csc_event.data.fd,
> +					strerror(errno));
> +				return NULL;
> +			} else if (nbytes == 0) {
> +				DRV_LOG(ERR, "Read nothing from file
> descriptor %d\n",
> +					csc_event.data.fd);
> +				continue;
> +			} else {
> +				virtio_interrupt_handler(internal);
> +			}
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +static int
> +setup_intr_relay(struct ifcvf_internal *internal)
> +{
> +	int ret;
> +
> +	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
> +			(void *)internal);
EAL API: rte_ctrl_thread_create, will be preferred.
> +	if (ret) {
> +		DRV_LOG(ERR, "failed to create notify relay pthread.");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static int
> +unset_intr_relay(struct ifcvf_internal *internal)
> +{
> +	void *status;
> +
> +	if (internal->intr_tid) {
> +		pthread_cancel(internal->intr_tid);
> +		pthread_join(internal->intr_tid, &status);
> +	}
> +	internal->intr_tid = 0;
> +
> +	if (internal->csc_fd >= 0)
> +		close(internal->csc_fd);
> +	internal->csc_fd = -1;
> +
> +	return 0;
> +}
> +
>  static int
>  update_datapath(struct ifcvf_internal *internal)
>  {
> @@ -584,10 +687,16 @@ struct rte_vdpa_dev_info {
>  		if (ret)
>  			goto err;
> 
> +		ret = setup_intr_relay(internal);
> +		if (ret)
> +			goto err;
> +
But this is not needed for net, right? As I said, we should
include validation for net also. 
Thanks,
Chenbo
>  		rte_atomic32_set(&internal->running, 1);
>  	} else if (rte_atomic32_read(&internal->running) &&
>  		   (!rte_atomic32_read(&internal->started) ||
>  		    !rte_atomic32_read(&internal->dev_attached))) {
> +		ret = unset_intr_relay(internal);
> +
>  		ret = unset_notify_relay(internal);
>  		if (ret)
>  			goto err;
> @@ -880,6 +989,9 @@ struct rte_vdpa_dev_info {
>  	/* stop the direct IO data path */
>  	unset_notify_relay(internal);
>  	vdpa_ifcvf_stop(internal);
> +
> +	unset_intr_relay(internal);
> +
>  	vdpa_disable_vfio_intr(internal);
> 
>  	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
  2022-04-27  8:30     ` [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
@ 2022-05-13  2:55       ` Xia, Chenbo
  2022-05-16  3:05         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-13  2:55 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
> 
> Add is_blk flag to ifcvf_hw, and init is_blk during probe.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h | 1 +
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
>  2 files changed, 3 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..8591ef1 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -149,6 +149,7 @@ struct ifcvf_hw {
>  	u8     *lm_cfg;
>  	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
>  	u8 nr_vring;
> +	u8 is_blk;
>  	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
>  };
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 0e94e1f..4923bc1 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1536,11 +1536,13 @@ struct rte_vdpa_dev_info dev_info[] = {
> 
>  	if (device_id == VIRTIO_ID_NET) {
>  		internal->device_type = IFCVF_NET;
> +		internal->hw.is_blk = IFCVF_NET;
I believe it's enough to keep only device_type. Device type can be defined in internal
or internal->hw. Choose the way you prefer.
Thanks,
Chenbo
>  		internal->features = features &
>  					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
>  		internal->features |= dev_info[IFCVF_NET].features;
>  	} else if (device_id == VIRTIO_ID_BLOCK) {
>  		internal->device_type = IFCVF_BLK;
> +		internal->hw.is_blk = IFCVF_BLK;
>  		internal->features = features &
>  					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
>  		internal->features |= dev_info[IFCVF_BLK].features;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device
  2022-04-27  8:30     ` [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-05-13  2:57       ` Xia, Chenbo
  2022-05-16  4:19         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-13  2:57 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk
> device
> 
> 1.last_avail_idx is lower 16 bit of the register.
> 2.address of ring_state register is different between net and blk device.
Not a good commit log. The commit log should illustrate more on what's the
commit is doing.
Thanks,
Chenbo
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.c | 36 +++++++++++++++++++++++++++++-------
>  drivers/vdpa/ifc/base/ifcvf.h |  1 +
>  2 files changed, 30 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index d10c1fd..4d5881a 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -218,10 +218,18 @@
>  				&cfg->queue_used_hi);
>  		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
> 
> -		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
> -			(u32)hw->vring[i].last_avail_idx |
> -			((u32)hw->vring[i].last_used_idx << 16);
> +		if (hw->is_blk == IFCVF_BLK) {
> +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> +				i * IFCVF_LM_CFG_SIZE) =
> +				(u32)hw->vring[i].last_avail_idx |
> +				((u32)hw->vring[i].last_used_idx << 16);
> +		} else if (hw->is_blk == IFCVF_NET) {
> +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> +				(i / 2) * IFCVF_LM_CFG_SIZE +
> +				(i % 2) * 4) =
> +				(u32)hw->vring[i].last_avail_idx |
> +				((u32)hw->vring[i].last_used_idx << 16);
> +		}
> 
>  		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
>  		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
> @@ -254,9 +262,23 @@
>  		IFCVF_WRITE_REG16(i, &cfg->queue_select);
>  		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
>  		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg-
> >queue_msix_vector);
> -		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET
> +
> -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
> -		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
> +
> +		if (hw->is_blk) {
> +			ring_state = *(u32 *)(hw->lm_cfg +
> +					IFCVF_LM_RING_STATE_OFFSET +
> +					i * IFCVF_LM_CFG_SIZE);
> +		} else if (hw->is_blk == IFCVF_NET) {
> +			ring_state = *(u32 *)(hw->lm_cfg +
> +					IFCVF_LM_RING_STATE_OFFSET +
> +					(i / 2) * IFCVF_LM_CFG_SIZE +
> +					(i % 2) * 4);
> +		}
> +
> +		if (hw->is_blk == IFCVF_BLK)
> +			hw->vring[i].last_avail_idx =
> +				(u16)(ring_state & IFCVF_16_BIT_MASK);
> +		else if (hw->is_blk == IFCVF_NET)
> +			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
>  		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
>  	}
>  }
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 8591ef1..ff11b12 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -65,6 +65,7 @@
>  #define IFCVF_MEDIATED_VRING		0x200000000000
> 
>  #define IFCVF_32_BIT_MASK		0xffffffff
> +#define IFCVF_16_BIT_MASK		0xffff
> 
>  #ifndef VHOST_USER_PROTOCOL_F_CONFIG
>  #define VHOST_USER_PROTOCOL_F_CONFIG	9
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO
  2022-04-27  8:30     ` [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO Andy Pei
@ 2022-05-13  2:59       ` Xia, Chenbo
  2022-05-16  4:20         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-13  2:59 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight
> IO
> 
> When virtio blk device is pause, make sure hardware last_avail_idx
> and last_used_idx are the same.
Patch is good. But illustrate more on avoiding in-flight packet in commit log
Thanks,
Chenbo
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++--------
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 4923bc1..def6adf 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -314,12 +314,12 @@ struct rte_vdpa_dev_info {
>  vdpa_ifcvf_stop(struct ifcvf_internal *internal)
>  {
>  	struct ifcvf_hw *hw = &internal->hw;
> -	struct rte_vhost_vring vq;
>  	uint32_t i;
>  	int vid;
>  	uint64_t features = 0;
>  	uint64_t log_base = 0, log_size = 0;
>  	uint64_t len;
> +	u32 ring_state = 0;
> 
>  	vid = internal->vid;
> 
> @@ -328,13 +328,22 @@ struct rte_vdpa_dev_info {
>  	 */
>  	if (internal->device_type == IFCVF_BLK) {
>  		for (i = 0; i < hw->nr_vring; i++) {
> -			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> -			while (vq.avail->idx != vq.used->idx) {
> -				ifcvf_notify_queue(hw, i);
> -				usleep(10);
> -			}
> -			hw->vring[i].last_avail_idx = vq.avail->idx;
> -			hw->vring[i].last_used_idx = vq.used->idx;
> +			do {
> +				if (hw->lm_cfg != NULL)
> +					ring_state = *(u32 *)(hw->lm_cfg +
> +						IFCVF_LM_RING_STATE_OFFSET +
> +						i * IFCVF_LM_CFG_SIZE);
> +				hw->vring[i].last_avail_idx =
> +					(u16)(ring_state & IFCVF_16_BIT_MASK);
> +				hw->vring[i].last_used_idx =
> +					(u16)(ring_state >> 16);
> +				if (hw->vring[i].last_avail_idx !=
> +					hw->vring[i].last_used_idx) {
> +					ifcvf_notify_queue(hw, i);
> +					usleep(10);
> +				}
> +			} while (hw->vring[i].last_avail_idx !=
> +				hw->vring[i].last_used_idx);
>  		}
>  	}
> 
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 18/18] vhost: make sure each queue callfd is configured
  2022-04-27  8:30     ` [PATCH v7 18/18] vhost: make sure each queue callfd is configured Andy Pei
@ 2022-05-13  3:10       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-13  3:10 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, April 27, 2022 4:30 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v7 18/18] vhost: make sure each queue callfd is configured
> 
> During the vhost data path building process, qemu will create
> a call fd at first, and create another call fd in the end.
> The final call fd will be used to relay notify.
> In the original code, after kick fd is set, dev_conf will
> set the first call fd. Even though the actual call fd will set,
> the data path will not work correctly.
I don't understand the problem. If callfd reset, we should re-configure
in the set_vring_state callback, and issue can be handled there.
Thanks,
Chenbo
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index e925428..82122b6 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -3230,12 +3230,26 @@ typedef int (*vhost_message_handler_t)(struct
> virtio_net **pdev,
>  	if (!vdpa_dev)
>  		goto out;
> 
> +	if (request != VHOST_USER_SET_VRING_CALL)
> +		goto out;
> +
>  	if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
>  		if (vdpa_dev->ops->dev_conf(dev->vid))
>  			VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA
> device\n",
>  					dev->ifname);
>  		else
>  			dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
> +	} else {
> +		/* when VIRTIO_DEV_VDPA_CONFIGURED already configured
> +		 * close the device and config the device again,
> +		 * make sure the call fd of each queue is configured correctly.
> +		 */
> +		if (vdpa_dev->ops->dev_close(dev->vid))
> +			VHOST_LOG_CONFIG(ERR,
> +					 "Failed to close vDPA device\n");
> +		if (vdpa_dev->ops->dev_conf(dev->vid))
> +			VHOST_LOG_CONFIG(ERR,
> +					 "Failed to re-config vDPA device\n");
>  	}
> 
>  out:
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
  2022-05-12 12:55       ` Xia, Chenbo
@ 2022-05-13  3:32         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-13  3:32 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
Your suggestion is good. I will fix in next version.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 8:55 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
> >
> > Add SW live-migration support to block device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 33 +++++++++++++++++++++++++++++----
> >  1 file changed, 29 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 07fc3ca..8a260b7 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -312,6 +312,7 @@ struct rte_vdpa_dev_info {  vdpa_ifcvf_stop(struct
> > ifcvf_internal *internal)  {
> >  	struct ifcvf_hw *hw = &internal->hw;
> > +	struct rte_vhost_vring vq;
> >  	uint32_t i;
> >  	int vid;
> >  	uint64_t features = 0;
> > @@ -319,6 +320,22 @@ struct rte_vdpa_dev_info {
> >  	uint64_t len;
> >
> >  	vid = internal->vid;
> > +
> > +	/* to make sure no packet is lost for blk device
> > +	 * do not stop until last_avail_idx == last_used_idx
> > +	 */
> > +	if (internal->device_type == IFCVF_BLK) {
> > +		for (i = 0; i < hw->nr_vring; i++) {
> > +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> > +			while (vq.avail->idx != vq.used->idx) {
> > +				ifcvf_notify_queue(hw, i);
> > +				usleep(10);
> > +			}
> > +			hw->vring[i].last_avail_idx = vq.avail->idx;
> > +			hw->vring[i].last_used_idx = vq.used->idx;
> > +		}
> > +	}
> > +
> 
> This seems not match with the above comment about avoiding in-flight
> packets.
> But the change in patch 17 seems good. Why not just using the
> implementation in patch 17?
> 
> Thanks,
> Chenbo
> 
> >  	ifcvf_stop_hw(hw);
> >
> >  	for (i = 0; i < hw->nr_vring; i++)
> > @@ -642,8 +659,10 @@ struct rte_vdpa_dev_info {
> >  		}
> >  		hw->vring[i].avail = gpa;
> >
> > -		/* Direct I/O for Tx queue, relay for Rx queue */
> > -		if (i & 1) {
> > +		/* NET: Direct I/O for Tx queue, relay for Rx queue
> > +		 * BLK: relay every queue
> > +		 */
> > +		if ((internal->device_type == IFCVF_NET) && (i & 1)) {
> >  			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> >  			if (gpa == 0) {
> >  				DRV_LOG(ERR, "Fail to get GPA for used
> ring."); @@ -693,8 +712,12
> > @@ struct rte_vdpa_dev_info {
> >
> >  	for (i = 0; i < hw->nr_vring; i++) {
> >  		/* synchronize remaining new used entries if any */
> > -		if ((i & 1) == 0)
> > +		if (internal->device_type == IFCVF_NET) {
> > +			if ((i & 1) == 0)
> > +				update_used_ring(internal, i);
> > +		} else if (internal->device_type == IFCVF_BLK) {
> >  			update_used_ring(internal, i);
> > +		}
> >
> >  		rte_vhost_get_vhost_vring(vid, i, &vq);
> >  		len = IFCVF_USED_RING_LEN(vq.size); @@ -756,7 +779,9
> @@ struct
> > rte_vdpa_dev_info {
> >  		}
> >  	}
> >
> > -	for (qid = 0; qid < q_num; qid += 2) {
> > +	for (qid = 0; qid < q_num; qid += 1) {
> > +		if ((internal->device_type == IFCVF_NET) && (qid & 1))
> > +			continue;
> >  		ev.events = EPOLLIN | EPOLLPRI;
> >  		/* leave a flag to mark it's for interrupt */
> >  		ev.data.u64 = 1 | qid << 1 |
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 07/18] vhost: add API to get vDPA device type
  2022-05-12 13:14       ` Xia, Chenbo
@ 2022-05-13  4:15         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-13  4:15 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 9:14 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 07/18] vhost: add API to get vDPA device type
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 07/18] vhost: add API to get vDPA device type
> >
> > Vhost backend of different devices have different features.
> > Add a API to get vDPA device type, net device or blk device currently,
> > so users can set different features for different kinds of devices.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/rte_vhost.h   | 17 +++++++++++++++++
> >  lib/vhost/socket.c      | 39 +++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vdpa_driver.h |  3 +++
> >  lib/vhost/version.map   |  2 ++
> >  4 files changed, 61 insertions(+)
> >
> > diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> > c733f85..c977a24 100644
> > --- a/lib/vhost/rte_vhost.h
> > +++ b/lib/vhost/rte_vhost.h
> > @@ -117,6 +117,9 @@
> >
> >  #define RTE_MAX_VHOST_DEVICE	1024
> >
> > +#define VDPA_DEVICE_TYPE_NET 0
> > +#define VDPA_DEVICE_TYPE_BLK 1
> > +
> >  struct rte_vdpa_device;
> >
> >  /**
> > @@ -486,6 +489,20 @@ struct rte_vdpa_device *
> > rte_vhost_driver_get_vdpa_device(const char *path);
> >
> >  /**
> > + * Get the device type of the vdpa device.
> > + *
> > + * @param path
> > + *  The vhost-user socket file path
> > + * @param type
> > + *  the device type of the vdpa device
> > + * @return
> > + *  0 on success, -1 on failure
> > + */
> > +__rte_experimental
> > +int
> > +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
> > +
> > +/**
> >   * Set the feature bits the vhost-user driver supports.
> >   *
> >   * @param path
> > diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c index
> > b304339..7da90e8 100644
> > --- a/lib/vhost/socket.c
> > +++ b/lib/vhost/socket.c
> > @@ -619,6 +619,45 @@ struct rte_vdpa_device *  }
> >
> >  int
> > +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
> > +{
> > +	struct vhost_user_socket *vsocket;
> > +	struct rte_vdpa_device *vdpa_dev;
> > +	uint32_t vdpa_type = 0;
> > +	int ret = 0;
> > +
> > +	pthread_mutex_lock(&vhost_user.mutex);
> > +	vsocket = find_vhost_user_socket(path);
> > +	if (!vsocket) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +				 "(%s) socket file is not registered yet.\n",
> > +				 path);
> > +		ret = -1;
> > +		goto unlock_exit;
> > +	}
> > +
> > +	vdpa_dev = vsocket->vdpa_dev;
> > +	if (!vdpa_dev) {
> > +		ret = -1;
> > +		goto unlock_exit;
> > +	}
> > +
> > +	if (vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type) < 0) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) failed to get vdpa dev type for socket file.\n",
> > +			path);
> > +		ret = -1;
> > +		goto unlock_exit;
> > +	}
> 
> If vendor's vdpa driver does not implement this callback, should return type
> NET.
> Another way to do may be make every vdpa driver implement the callback,
> but since other vendors only have one type. I prefer the first way.
> 
Yes, I agree with you. I will send a new version to fix this.
> > +
> > +	*type = vdpa_type;
> > +
> > +unlock_exit:
> > +	pthread_mutex_unlock(&vhost_user.mutex);
> > +	return ret;
> > +}
> > +
> > +int
> >  rte_vhost_driver_disable_features(const char *path, uint64_t
> > features)  {
> >  	struct vhost_user_socket *vsocket;
> > diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index
> > e59a834..9cbd7cd 100644
> > --- a/lib/vhost/vdpa_driver.h
> > +++ b/lib/vhost/vdpa_driver.h
> > @@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
> >  	/** Set the device configuration space */
> >  	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
> >  		      uint32_t size, uint32_t flags);
> > +
> > +	/** get device type: net device, blk device... */
> > +	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
> >  };
> >
> >  /**
> > diff --git a/lib/vhost/version.map b/lib/vhost/version.map index
> > 0a66c58..fe4e8de 100644
> > --- a/lib/vhost/version.map
> > +++ b/lib/vhost/version.map
> > @@ -87,6 +87,8 @@ EXPERIMENTAL {
> >
> >  	# added in 22.03
> >  	rte_vhost_async_dma_configure;
> > +
> > +	rte_vhost_driver_get_vdpa_dev_type;
> 
> Missed '# added in 22.07' tag, but when you do v8, this may not be a
> problem as other patches may add this tag with new API introduced.
> 
> And introducing new API will need update of release note.
> Please refer to http://git.dpdk.org/next/dpdk-next-
> virtio/commit/?id=868883e899af386abcc298ea80ec7f6a18d8a8e7
> as an example.
> 
> Thanks,
> Chenbo
> 
Sure. I will refer to this.
> >  };
> >
> >  INTERNAL {
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
  2022-05-12 13:40         ` Xia, Chenbo
@ 2022-05-13  7:38           ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-13  7:38 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
HI Chenbo,
Thanks for your reply.
I will implement a function "ifcvf_get_device_type" for both ifc net and blk ops.
"ifcvf_get_device_type" will return device type according to internal->device_type.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 9:41 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; Pei, Andy <andy.pei@intel.com>;
> dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
> 
> > -----Original Message-----
> > From: Xia, Chenbo <chenbo.xia@intel.com>
> > Sent: Thursday, May 12, 2022 9:22 PM
> > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > Changpeng <changpeng.liu@intel.com>
> > Subject: RE: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc
> > driver
> >
> > > -----Original Message-----
> > > From: Pei, Andy <andy.pei@intel.com>
> > > Sent: Wednesday, April 27, 2022 4:30 PM
> > > To: dev@dpdk.org
> > > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > > <changpeng.liu@intel.com>
> > > Subject: [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc
> > driver
> > >
> > > Add get device type ops to ifc driver.
> > >
> > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > ---
> > >  drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++++++++++
> > >  1 file changed, 10 insertions(+)
> > >
> > > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > index 8a260b7..99a6ab0 100644
> > > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > @@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
> > >  	return 0;
> > >  }
> > >
> > > +static int
> > > +ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
> > > +	uint32_t *type)
> > > +{
> > > +	RTE_SET_USED(vdev);
> > > +	*type = VDPA_DEVICE_TYPE_BLK;
> > > +	return 0;
> >
> > This is not right. Remember net and blk are both using the driver?
> > This will lead to using net also returns BLK.
> 
> Correct a bit for above: will lead to net not working. So implement callback
> for both (although in the API, NULL callback can return NET type)
> 
> Thanks,
> Chenbo
> 
> >
> > And I suggest the patch-set validated with both blk and net...
> >
> > Besides, ifcvf_blk_get_device_type should be ifcvf_get_device_type
> >
> > Thanks,
> > Chenbo
> >
> > > +}
> > > +
> > >  static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
> > >  	.get_queue_num = ifcvf_get_queue_num,
> > >  	.get_features = ifcvf_get_vdpa_features, @@ -1313,6 +1322,7 @@
> > > struct rte_vdpa_dev_info {
> > >  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> > >  	.get_notify_area = ifcvf_get_notify_area,
> > >  	.get_config = ifcvf_blk_get_config,
> > > +	.get_dev_type = ifcvf_blk_get_device_type,
> > >  };
> > >
> > >  struct rte_vdpa_dev_info dev_info[] = {
> > > --
> > > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example
  2022-05-12 13:34       ` Xia, Chenbo
@ 2022-05-13  8:16         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-13  8:16 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
HI Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 9:34 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 09/18] examples/vdpa: add vDPA blk support in
> example
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 09/18] examples/vdpa: add vDPA blk support in
> > example
> 
> Better be: examples/vdpa: add virtio blk support
> 
Sure.
> >
> > Add virtio blk device support to vDPA example.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  examples/vdpa/main.c             | 57
> +++++++++++++++++++++++++++++++++++
> >  examples/vdpa/vdpa_blk_compact.h | 65
> > ++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 122 insertions(+)
> >  create mode 100644 examples/vdpa/vdpa_blk_compact.h
> >
> > diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index
> > 5ab0765..2544141 100644
> > --- a/examples/vdpa/main.c
> > +++ b/examples/vdpa/main.c
> > @@ -20,6 +20,7 @@
> >  #include <cmdline_parse_string.h>
> >  #include <cmdline_parse_num.h>
> >  #include <cmdline.h>
> > +#include "vdpa_blk_compact.h"
> >
> >  #define MAX_PATH_LEN 128
> >  #define MAX_VDPA_SAMPLE_PORTS 1024
> > @@ -159,8 +160,54 @@ struct vdpa_port {  };
> >
> >  static int
> > +vdpa_blk_device_set_features_and_protocol(const char *path) {
> > +	uint64_t protocol_features = 0;
> > +	int ret;
> > +
> > +	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_disable_features(path,
> > +		VHOST_BLK_DISABLED_FEATURES);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_disable_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_get_protocol_features(path,
> > &protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_get_protocol_features for %s
> > failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
> > +
> > +	ret = rte_vhost_driver_set_protocol_features(path,
> > protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_protocol_features for %s
> > failed.\n",
> > +			path);
> > +		goto out;
> 
> This goto is not needed.
> 
Yes, you are right. I will fix it.
> > +	}
> > +
> > +out:
> > +	return ret;
> > +}
> > +
> > +static int
> >  start_vdpa(struct vdpa_port *vport)
> >  {
> > +	uint32_t device_type = 0;
> >  	int ret;
> >  	char *socket_path = vport->ifname;
> >
> > @@ -192,6 +239,16 @@ struct vdpa_port {
> >  			"attach vdpa device failed: %s\n",
> >  			socket_path);
> >
> > +	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path,
> &device_type);
> > +	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
> > +		RTE_LOG(NOTICE, VDPA, "is a blk device\n");
> 
> Should add info of socket path
> 
Yes, you are right. I will fix it.
> > +		ret =
> vdpa_blk_device_set_features_and_protocol(socket_path);
> > +		if (ret != 0)
> > +			rte_exit(EXIT_FAILURE,
> > +				"set vhost blk driver features and protocol
> > features failed: %s\n",
> > +				socket_path);
> > +	}
> > +
> >  	if (rte_vhost_driver_start(socket_path) < 0)
> >  		rte_exit(EXIT_FAILURE,
> >  			"start vhost driver failed: %s\n", diff --git
> > a/examples/vdpa/vdpa_blk_compact.h
> > b/examples/vdpa/vdpa_blk_compact.h
> > new file mode 100644
> > index 0000000..136c3f6
> > --- /dev/null
> > +++ b/examples/vdpa/vdpa_blk_compact.h
> > @@ -0,0 +1,65 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation  */
> > +
> > +#ifndef _VDPA_BLK_COMPACT_H_
> > +#define _VDPA_BLK_COMPACT_H_
> > +
> > +/**
> > + * @file
> > + *
> > + * Device specific vhost lib
> > + */
> > +
> > +#include <stdbool.h>
> > +
> > +#include <rte_pci.h>
> 
> above two headers are not used in this file?
> 
I will remove these two include.
> > +#include <rte_vhost.h>
> > +
> > +/* Feature bits */
> > +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment
> size
> > */
> > +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of
> segments
> > */
> > +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> > +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> > */
> > +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> > available */
> > +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> > +
> > +/* Legacy feature bits */
> > +#ifndef VIRTIO_BLK_NO_LEGACY
> > +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> > +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> > */
> > +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> > config */
> > +#endif /* !VIRTIO_BLK_NO_LEGACY */
> > +
> > +#ifndef VHOST_USER_F_PROTOCOL_FEATURES #define
> > +VHOST_USER_F_PROTOCOL_FEATURES 30 #endif
> 
> It's already in rte_vhost.h, so no need to re-define.
> 
> Thanks,
> Chenbo
> 
Yes, you are right. I will fix it.
> > +
> > +#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
> > +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> > +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
> > +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> > +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> > +	(1ULL << VIRTIO_F_VERSION_1))
> > +
> > +#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL <<
> > VIRTIO_F_NOTIFY_ON_EMPTY) | \
> > +	(1ULL << VIRTIO_RING_F_EVENT_IDX))
> > +
> > +#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
> > +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL <<
> VIRTIO_BLK_F_SEG_MAX) | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_BLK_SIZE) |
> > \
> > +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL <<
> VIRTIO_BLK_F_BARRIER)  |
> > \
> > +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE)
> > | \
> > +	(1ULL << VIRTIO_BLK_F_MQ))
> > +
> > +/* Not supported features */
> > +#define VHOST_BLK_DISABLED_FEATURES
> (VHOST_BLK_DISABLED_FEATURES_BASE | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_BARRIER) | \
> > +	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE))
> > +
> > +/* Vhost-blk support protocol features */ #define
> > +VHOST_BLK_PROTOCOL_FEATURES \
> > +	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
> > +	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
> > +
> > +#endif /* _VDPA_BLK_COMPACT_H_ */
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-05-12 13:53       ` Xia, Chenbo
@ 2022-05-13  8:34         ` Pei, Andy
  2022-05-13  8:40           ` Xia, Chenbo
  2022-05-13 15:37           ` Stephen Hemminger
  0 siblings, 2 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-13  8:34 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 9:53 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> qemu connect
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> > qemu connect
> 
> Title can be:
> 
> vdpa/ifc: add log for config space of virtio blk
> 
Sure.
> >
> > Add some log of virtio blk device config space information at VDPA
> > launch before qemu connects.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
> >  1 file changed, 28 insertions(+)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index ca49bc3..4060a44 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
> >  	struct rte_kvargs *kvlist = NULL;
> >  	int ret = 0;
> >  	int16_t device_id;
> > +	uint64_t capacity = 0;
> > +	uint8_t *byte;
> > +	uint32_t i;
> >
> >  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> >  		return 0;
> > @@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
> >  		internal->features = features &
> >  					~(1ULL <<
> VIRTIO_F_IOMMU_PLATFORM);
> >  		internal->features |= dev_info[IFCVF_BLK].features;
> > +
> > +		/* cannot read 64-bit register in one attempt,
> > +		 * so read byte by byte.
> > +		 */
> > +		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> > +			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> > +			capacity |= (uint64_t)*byte << (i * 8);
> > +		}
> > +		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
> 
> I believe this '21' should be calculated rather than hard-code.
> 
/* The capacity (in 512-byte sectors). */
So right shift 1 we gwt in K. Another right shift 10 we get in M, right shift 10 more bits, we get in G.
10 + 10 + 1 = 21.
I think add some comments in code is fine, what do you think?
> And should these log all be debug log?
> 
These information is hardware ability. I think INFO is fine.
> Thanks,
> Chenbo
> 
> > +
> > +		DRV_LOG(INFO, "size_max  : 0x%08x",
> > +			internal->hw.blk_cfg->size_max);
> > +		DRV_LOG(INFO, "seg_max   : 0x%08x",
> > +			internal->hw.blk_cfg->seg_max);
> > +		DRV_LOG(INFO, "blk_size  : 0x%08x",
> > +			internal->hw.blk_cfg->blk_size);
> > +		DRV_LOG(INFO, "geometry");
> > +		DRV_LOG(INFO, "    cylinders: %u",
> > +			internal->hw.blk_cfg->geometry.cylinders);
> > +		DRV_LOG(INFO, "    heads    : %u",
> > +			internal->hw.blk_cfg->geometry.heads);
> > +		DRV_LOG(INFO, "    sectors  : %u",
> > +			internal->hw.blk_cfg->geometry.sectors);
> > +		DRV_LOG(INFO, "num_queues: 0x%08x",
> > +			internal->hw.blk_cfg->num_queues);
> >  	}
> >
> >  	list->internal = internal;
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-05-13  8:34         ` Pei, Andy
@ 2022-05-13  8:40           ` Xia, Chenbo
  2022-05-13 15:37           ` Stephen Hemminger
  1 sibling, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-13  8:40 UTC (permalink / raw)
  To: Pei, Andy, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Friday, May 13, 2022 4:35 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> qemu connect
> 
> Hi Chenbo,
> 
> Thanks for your reply.
> My reply is inline.
> 
> > -----Original Message-----
> > From: Xia, Chenbo <chenbo.xia@intel.com>
> > Sent: Thursday, May 12, 2022 9:53 PM
> > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > Changpeng <changpeng.liu@intel.com>
> > Subject: RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch
> before
> > qemu connect
> >
> > > -----Original Message-----
> > > From: Pei, Andy <andy.pei@intel.com>
> > > Sent: Wednesday, April 27, 2022 4:30 PM
> > > To: dev@dpdk.org
> > > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > > <changpeng.liu@intel.com>
> > > Subject: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> > > qemu connect
> >
> > Title can be:
> >
> > vdpa/ifc: add log for config space of virtio blk
> >
> Sure.
> > >
> > > Add some log of virtio blk device config space information at VDPA
> > > launch before qemu connects.
> > >
> > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > ---
> > >  drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
> > >  1 file changed, 28 insertions(+)
> > >
> > > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > b/drivers/vdpa/ifc/ifcvf_vdpa.c index ca49bc3..4060a44 100644
> > > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > @@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
> > >  struct rte_kvargs *kvlist = NULL;
> > >  int ret = 0;
> > >  int16_t device_id;
> > > +uint64_t capacity = 0;
> > > +uint8_t *byte;
> > > +uint32_t i;
> > >
> > >  if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> > >  return 0;
> > > @@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
> > >  internal->features = features &
> > >  ~(1ULL <<
> > VIRTIO_F_IOMMU_PLATFORM);
> > >  internal->features |= dev_info[IFCVF_BLK].features;
> > > +
> > > +/* cannot read 64-bit register in one attempt,
> > > + * so read byte by byte.
> > > + */
> > > +for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> > > +byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> > > +capacity |= (uint64_t)*byte << (i * 8);
> > > +}
> > > +DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
> >
> > I believe this '21' should be calculated rather than hard-code.
> >
> /* The capacity (in 512-byte sectors). */
> So right shift 1 we gwt in K. Another right shift 10 we get in M, right
> shift 10 more bits, we get in G.
> 10 + 10 + 1 = 21.
> I think add some comments in code is fine, what do you think?
Ok. Work for me.
Thanks,
Chenbo
> 
> > And should these log all be debug log?
> >
> These information is hardware ability. I think INFO is fine.
> 
> > Thanks,
> > Chenbo
> >
> > > +
> > > +DRV_LOG(INFO, "size_max  : 0x%08x",
> > > +internal->hw.blk_cfg->size_max);
> > > +DRV_LOG(INFO, "seg_max   : 0x%08x",
> > > +internal->hw.blk_cfg->seg_max);
> > > +DRV_LOG(INFO, "blk_size  : 0x%08x",
> > > +internal->hw.blk_cfg->blk_size);
> > > +DRV_LOG(INFO, "geometry");
> > > +DRV_LOG(INFO, "    cylinders: %u",
> > > +internal->hw.blk_cfg->geometry.cylinders);
> > > +DRV_LOG(INFO, "    heads    : %u",
> > > +internal->hw.blk_cfg->geometry.heads);
> > > +DRV_LOG(INFO, "    sectors  : %u",
> > > +internal->hw.blk_cfg->geometry.sectors);
> > > +DRV_LOG(INFO, "num_queues: 0x%08x",
> > > +internal->hw.blk_cfg->num_queues);
> > >  }
> > >
> > >  list->internal = internal;
> > > --
> > > 1.8.3.1
> >
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware
  2022-05-12 13:55       ` Xia, Chenbo
@ 2022-05-13  8:58         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-13  8:58 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for your reply.
This patch will be omitted in next version.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Thursday, May 12, 2022 9:55 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 13/18] vdpa/ifc: read virtio max queues from
> hardware
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 13/18] vdpa/ifc: read virtio max queues from
> > hardware
> >
> > Original code max_queues is set to IFCVF_MAX_QUEUES.
> > New code max_queues is the min of IFCVF_MAX_QUEUES and hardware
> num_queues.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++++
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4060a44..5a8cf1c 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1457,6 +1457,10 @@ struct rte_vdpa_dev_info dev_info[] = {
> >  			internal->hw.blk_cfg->geometry.sectors);
> >  		DRV_LOG(INFO, "num_queues: 0x%08x",
> >  			internal->hw.blk_cfg->num_queues);
> > +
> > +		/* reset max_queue here, to minimum modification */
> > +		internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
> > +			internal->hw.blk_cfg->num_queues);
> 
> MQ is not supported now in this driver, should we make this change later?
> 
> Thanks,
> Chenbo
> 
> >  	}
> >
> >  	list->internal = internal;
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk
  2022-05-13  2:52       ` Xia, Chenbo
@ 2022-05-13 10:10         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-13 10:10 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
HI Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, May 13, 2022 10:53 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio
> blk
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for
> > virtio blk
> 
> Better be: vdpa/ifc: add interrupt handling for config space
> 
Sure. I will fix it in next version.
> >
> > Create a thread to poll and relay config space change interrupt.
> > Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.
> 
> Inform QEMU. You don't need to save words in commit log. The commit log
> should be as detailed as possible to make readers understand quickly what
> the commit is doing :)
> 
Sure. I will fix it in next version.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 112
> > ++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 112 insertions(+)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 5a8cf1c..0e94e1f 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -53,7 +53,9 @@ struct ifcvf_internal {
> >  	int vfio_group_fd;
> >  	int vfio_dev_fd;
> >  	pthread_t tid;	/* thread for notify relay */
> > +	pthread_t intr_tid;	/* thread for intr relay */
> 
> Thread for virtio-blk config space change interrupt relay
> 
Sure.
> >  	int epfd;
> > +	int csc_fd;
> 
> csc_epfd
> 
OK.
> >  	int vid;
> >  	struct rte_vdpa_device *vdev;
> >  	uint16_t max_queues;
> > @@ -558,6 +560,107 @@ struct rte_vdpa_dev_info {
> >  	return 0;
> >  }
> >
> > +static void
> > +virtio_interrupt_handler(struct ifcvf_internal *internal) {
> > +	int vid = internal->vid;
> > +	int ret;
> > +
> > +	ret = rte_vhost_slave_config_change(vid, 1);
> > +	if (ret)
> > +		DRV_LOG(ERR, "failed to notify the guest about configuration
> > space change.");
> > +}
> > +
> > +static void *
> > +intr_relay(void *arg)
> > +{
> > +	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
> > +	struct epoll_event csc_event;
> > +	struct epoll_event ev;
> > +	uint64_t buf;
> > +	int nbytes;
> > +	int csc_fd, csc_val = 0;
> > +
> > +	csc_fd = epoll_create(1);
> > +	if (csc_fd < 0) {
> > +		DRV_LOG(ERR, "failed to create epoll for config space
> > change.");
> > +		return NULL;
> > +	}
> > +
> > +	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
> > +	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
> > +	if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
> > +		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
> > +		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> > +		return NULL;
> 
> Close the epfd and set to -1 if err.
> 
I check other epoll usage in DPDK, it seems most usage do not close the epfd and set to -1.
I am not sure whether it is needed.
> > +	}
> > +
> > +	internal->csc_fd = csc_fd;
> > +
> > +	for (;;) {
> > +		csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
> > +		if (csc_val < 0) {
> > +			if (errno == EINTR)
> > +				continue;
> > +			DRV_LOG(ERR, "epoll_wait return fail\n");
> 
> Save '\n', it's not needed for DRV_LOG. Please check other DRV_LOGs
> 
OK
> > +			return NULL;
> > +		} else if (csc_val == 0) {
> > +			continue;
> > +		} else {
> > +			/* csc_val > 0 */
> > +			nbytes = read(csc_event.data.fd, &buf, 8);
> > +			if (nbytes < 0) {
> > +				if (errno == EINTR || errno == EWOULDBLOCK)
> 
> EAGAIN should also be this case?
> 
Yes, it will be add in next version.
> > +					continue;
> > +				DRV_LOG(ERR, "Error reading from file
> > descriptor %d: %s\n",
> > +					csc_event.data.fd,
> > +					strerror(errno));
> > +				return NULL;
> > +			} else if (nbytes == 0) {
> > +				DRV_LOG(ERR, "Read nothing from file
> > descriptor %d\n",
> > +					csc_event.data.fd);
> > +				continue;
> > +			} else {
> > +				virtio_interrupt_handler(internal);
> > +			}
> > +		}
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +static int
> > +setup_intr_relay(struct ifcvf_internal *internal) {
> > +	int ret;
> > +
> > +	ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
> > +			(void *)internal);
> 
> EAL API: rte_ctrl_thread_create, will be preferred.
> 
Sure, I will use " rte_ctrl_thread_create " in next version.
> > +	if (ret) {
> > +		DRV_LOG(ERR, "failed to create notify relay pthread.");
> > +		return -1;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int
> > +unset_intr_relay(struct ifcvf_internal *internal) {
> > +	void *status;
> > +
> > +	if (internal->intr_tid) {
> > +		pthread_cancel(internal->intr_tid);
> > +		pthread_join(internal->intr_tid, &status);
> > +	}
> > +	internal->intr_tid = 0;
> > +
> > +	if (internal->csc_fd >= 0)
> > +		close(internal->csc_fd);
> > +	internal->csc_fd = -1;
> > +
> > +	return 0;
> > +}
> > +
> >  static int
> >  update_datapath(struct ifcvf_internal *internal)  { @@ -584,10
> > +687,16 @@ struct rte_vdpa_dev_info {
> >  		if (ret)
> >  			goto err;
> >
> > +		ret = setup_intr_relay(internal);
> > +		if (ret)
> > +			goto err;
> > +
> 
> But this is not needed for net, right? As I said, we should include validation
> for net also.
> 
> Thanks,
> Chenbo
> 
For net device, especially the harden virtio device,  fabric plug in or out will cause config change.
I think net device may also need this interrupt, but I am not sure.
> >  		rte_atomic32_set(&internal->running, 1);
> >  	} else if (rte_atomic32_read(&internal->running) &&
> >  		   (!rte_atomic32_read(&internal->started) ||
> >  		    !rte_atomic32_read(&internal->dev_attached))) {
> > +		ret = unset_intr_relay(internal);
> > +
> >  		ret = unset_notify_relay(internal);
> >  		if (ret)
> >  			goto err;
> > @@ -880,6 +989,9 @@ struct rte_vdpa_dev_info {
> >  	/* stop the direct IO data path */
> >  	unset_notify_relay(internal);
> >  	vdpa_ifcvf_stop(internal);
> > +
> > +	unset_intr_relay(internal);
> > +
> >  	vdpa_disable_vfio_intr(internal);
> >
> >  	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL,
> false);
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-05-13  8:34         ` Pei, Andy
  2022-05-13  8:40           ` Xia, Chenbo
@ 2022-05-13 15:37           ` Stephen Hemminger
  2022-05-16  1:03             ` Pei, Andy
  1 sibling, 1 reply; 263+ messages in thread
From: Stephen Hemminger @ 2022-05-13 15:37 UTC (permalink / raw)
  To: Pei, Andy; +Cc: Xia, Chenbo, dev, maxime.coquelin, Cao, Gang, Liu, Changpeng
On Fri, 13 May 2022 08:34:38 +0000
"Pei, Andy" <andy.pei@intel.com> wrote:
> Hi Chenbo,
> 
> Thanks for your reply.
> My reply is inline.
> 
> > -----Original Message-----
> > From: Xia, Chenbo <chenbo.xia@intel.com>
> > Sent: Thursday, May 12, 2022 9:53 PM
> > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > Changpeng <changpeng.liu@intel.com>
> > Subject: RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> > qemu connect
> >   
> > > -----Original Message-----
> > > From: Pei, Andy <andy.pei@intel.com>
> > > Sent: Wednesday, April 27, 2022 4:30 PM
> > > To: dev@dpdk.org
> > > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > > <changpeng.liu@intel.com>
> > > Subject: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> > > qemu connect  
> > 
> > Title can be:
> > 
> > vdpa/ifc: add log for config space of virtio blk
> >   
> Sure.
> > >
> > > Add some log of virtio blk device config space information at VDPA
> > > launch before qemu connects.
> > >
> > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > ---
> > >  drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
> > >  1 file changed, 28 insertions(+)
> > >
> > > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > b/drivers/vdpa/ifc/ifcvf_vdpa.c index ca49bc3..4060a44 100644
> > > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > @@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
> > >  	struct rte_kvargs *kvlist = NULL;
> > >  	int ret = 0;
> > >  	int16_t device_id;
> > > +	uint64_t capacity = 0;
> > > +	uint8_t *byte;
> > > +	uint32_t i;
> > >
> > >  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> > >  		return 0;
> > > @@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
> > >  		internal->features = features &
> > >  					~(1ULL <<  
> > VIRTIO_F_IOMMU_PLATFORM);  
> > >  		internal->features |= dev_info[IFCVF_BLK].features;
> > > +
> > > +		/* cannot read 64-bit register in one attempt,
> > > +		 * so read byte by byte.
> > > +		 */
> > > +		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> > > +			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> > > +			capacity |= (uint64_t)*byte << (i * 8);
> > > +		}
> > > +		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);  
> > 
> > I believe this '21' should be calculated rather than hard-code.
Sounds like a debug not info message.
It is meant for developer not end user.
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
  2022-05-13 15:37           ` Stephen Hemminger
@ 2022-05-16  1:03             ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-16  1:03 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Xia, Chenbo, dev, maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Stephen,
Thanks for your reply.
I will change to DEBUG level log in next version.
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Friday, May 13, 2022 11:38 PM
> To: Pei, Andy <andy.pei@intel.com>
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; dev@dpdk.org;
> maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: Re: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before
> qemu connect
> 
> On Fri, 13 May 2022 08:34:38 +0000
> "Pei, Andy" <andy.pei@intel.com> wrote:
> 
> > Hi Chenbo,
> >
> > Thanks for your reply.
> > My reply is inline.
> >
> > > -----Original Message-----
> > > From: Xia, Chenbo <chenbo.xia@intel.com>
> > > Sent: Thursday, May 12, 2022 9:53 PM
> > > To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> > > Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > > Changpeng <changpeng.liu@intel.com>
> > > Subject: RE: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch
> > > before qemu connect
> > >
> > > > -----Original Message-----
> > > > From: Pei, Andy <andy.pei@intel.com>
> > > > Sent: Wednesday, April 27, 2022 4:30 PM
> > > > To: dev@dpdk.org
> > > > Cc: Xia, Chenbo <chenbo.xia@intel.com>;
> > > > maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> > > > Changpeng <changpeng.liu@intel.com>
> > > > Subject: [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch
> > > > before qemu connect
> > >
> > > Title can be:
> > >
> > > vdpa/ifc: add log for config space of virtio blk
> > >
> > Sure.
> > > >
> > > > Add some log of virtio blk device config space information at VDPA
> > > > launch before qemu connects.
> > > >
> > > > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > > > ---
> > > >  drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++++++++++++++++++++++++++++
> > > >  1 file changed, 28 insertions(+)
> > > >
> > > > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > > b/drivers/vdpa/ifc/ifcvf_vdpa.c index ca49bc3..4060a44 100644
> > > > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > > > @@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
> > > >  	struct rte_kvargs *kvlist = NULL;
> > > >  	int ret = 0;
> > > >  	int16_t device_id;
> > > > +	uint64_t capacity = 0;
> > > > +	uint8_t *byte;
> > > > +	uint32_t i;
> > > >
> > > >  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> > > >  		return 0;
> > > > @@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
> > > >  		internal->features = features &
> > > >  					~(1ULL <<
> > > VIRTIO_F_IOMMU_PLATFORM);
> > > >  		internal->features |= dev_info[IFCVF_BLK].features;
> > > > +
> > > > +		/* cannot read 64-bit register in one attempt,
> > > > +		 * so read byte by byte.
> > > > +		 */
> > > > +		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> > > > +			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> > > > +			capacity |= (uint64_t)*byte << (i * 8);
> > > > +		}
> > > > +		DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
> > >
> > > I believe this '21' should be calculated rather than hard-code.
> 
> Sounds like a debug not info message.
> It is meant for developer not end user.
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
  2022-05-13  2:55       ` Xia, Chenbo
@ 2022-05-16  3:05         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-16  3:05 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, May 13, 2022 10:55 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
> >
> > Add is_blk flag to ifcvf_hw, and init is_blk during probe.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.h | 1 +
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
> >  2 files changed, 3 insertions(+)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..8591ef1 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -149,6 +149,7 @@ struct ifcvf_hw {
> >  	u8     *lm_cfg;
> >  	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
> >  	u8 nr_vring;
> > +	u8 is_blk;
> >  	struct ifcvf_pci_mem_resource
> mem_resource[IFCVF_PCI_MAX_RESOURCE];
> >  };
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 0e94e1f..4923bc1 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1536,11 +1536,13 @@ struct rte_vdpa_dev_info dev_info[] = {
> >
> >  	if (device_id == VIRTIO_ID_NET) {
> >  		internal->device_type = IFCVF_NET;
> > +		internal->hw.is_blk = IFCVF_NET;
> 
> I believe it's enough to keep only device_type. Device type can be defined in
> internal or internal->hw. Choose the way you prefer.
> 
> Thanks,
> Chenbo
> 
Yes, you are right. To keep API not changed, I think it is better to keep only device_type in internal->hw.
So I have to make some change to other patch in this patch set.
> >  		internal->features = features &
> >  					~(1ULL <<
> VIRTIO_F_IOMMU_PLATFORM);
> >  		internal->features |= dev_info[IFCVF_NET].features;
> >  	} else if (device_id == VIRTIO_ID_BLOCK) {
> >  		internal->device_type = IFCVF_BLK;
> > +		internal->hw.is_blk = IFCVF_BLK;
> >  		internal->features = features &
> >  					~(1ULL <<
> VIRTIO_F_IOMMU_PLATFORM);
> >  		internal->features |= dev_info[IFCVF_BLK].features;
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device
  2022-05-13  2:57       ` Xia, Chenbo
@ 2022-05-16  4:19         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-16  4:19 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
HI Chenbo,
Thanks for your reply.
I will send out a new version to fix this.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, May 13, 2022 10:58 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk
> device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 16/18] vdpa/ifc/base: access correct register for
> > blk device
> >
> > 1.last_avail_idx is lower 16 bit of the register.
> > 2.address of ring_state register is different between net and blk device.
> 
> Not a good commit log. The commit log should illustrate more on what's the
> commit is doing.
> 
> Thanks,
> Chenbo
> 
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.c | 36
> > +++++++++++++++++++++++++++++-------
> >  drivers/vdpa/ifc/base/ifcvf.h |  1 +
> >  2 files changed, 30 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.c
> > b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..4d5881a 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.c
> > +++ b/drivers/vdpa/ifc/base/ifcvf.c
> > @@ -218,10 +218,18 @@
> >  				&cfg->queue_used_hi);
> >  		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
> >
> > -		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> > -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
> > -			(u32)hw->vring[i].last_avail_idx |
> > -			((u32)hw->vring[i].last_used_idx << 16);
> > +		if (hw->is_blk == IFCVF_BLK) {
> > +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> > +				i * IFCVF_LM_CFG_SIZE) =
> > +				(u32)hw->vring[i].last_avail_idx |
> > +				((u32)hw->vring[i].last_used_idx << 16);
> > +		} else if (hw->is_blk == IFCVF_NET) {
> > +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> > +				(i / 2) * IFCVF_LM_CFG_SIZE +
> > +				(i % 2) * 4) =
> > +				(u32)hw->vring[i].last_avail_idx |
> > +				((u32)hw->vring[i].last_used_idx << 16);
> > +		}
> >
> >  		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
> >  		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) == @@ -
> 254,9 +262,23
> > @@
> >  		IFCVF_WRITE_REG16(i, &cfg->queue_select);
> >  		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
> >  		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg-
> > >queue_msix_vector);
> > -		ring_state = *(u32 *)(hw->lm_cfg +
> IFCVF_LM_RING_STATE_OFFSET
> > +
> > -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
> > -		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
> > +
> > +		if (hw->is_blk) {
> > +			ring_state = *(u32 *)(hw->lm_cfg +
> > +					IFCVF_LM_RING_STATE_OFFSET +
> > +					i * IFCVF_LM_CFG_SIZE);
> > +		} else if (hw->is_blk == IFCVF_NET) {
> > +			ring_state = *(u32 *)(hw->lm_cfg +
> > +					IFCVF_LM_RING_STATE_OFFSET +
> > +					(i / 2) * IFCVF_LM_CFG_SIZE +
> > +					(i % 2) * 4);
> > +		}
> > +
> > +		if (hw->is_blk == IFCVF_BLK)
> > +			hw->vring[i].last_avail_idx =
> > +				(u16)(ring_state & IFCVF_16_BIT_MASK);
> > +		else if (hw->is_blk == IFCVF_NET)
> > +			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
> >  		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
> >  	}
> >  }
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 8591ef1..ff11b12 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -65,6 +65,7 @@
> >  #define IFCVF_MEDIATED_VRING		0x200000000000
> >
> >  #define IFCVF_32_BIT_MASK		0xffffffff
> > +#define IFCVF_16_BIT_MASK		0xffff
> >
> >  #ifndef VHOST_USER_PROTOCOL_F_CONFIG
> >  #define VHOST_USER_PROTOCOL_F_CONFIG	9
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO
  2022-05-13  2:59       ` Xia, Chenbo
@ 2022-05-16  4:20         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-16  4:20 UTC (permalink / raw)
  To: Xia, Chenbo, dev; +Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng
Hi Chenbo,
Thanks for reply.
This will be addressed in next version.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, May 13, 2022 10:59 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight
> IO
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, April 27, 2022 4:30 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v7 17/18] vdpa/ifc: blk device pause without no
> > inflight IO
> >
> > When virtio blk device is pause, make sure hardware last_avail_idx and
> > last_used_idx are the same.
> 
> Patch is good. But illustrate more on avoiding in-flight packet in commit log
> 
> Thanks,
> Chenbo
> 
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++--------
> >  1 file changed, 17 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4923bc1..def6adf 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -314,12 +314,12 @@ struct rte_vdpa_dev_info {
> > vdpa_ifcvf_stop(struct ifcvf_internal *internal)  {
> >  	struct ifcvf_hw *hw = &internal->hw;
> > -	struct rte_vhost_vring vq;
> >  	uint32_t i;
> >  	int vid;
> >  	uint64_t features = 0;
> >  	uint64_t log_base = 0, log_size = 0;
> >  	uint64_t len;
> > +	u32 ring_state = 0;
> >
> >  	vid = internal->vid;
> >
> > @@ -328,13 +328,22 @@ struct rte_vdpa_dev_info {
> >  	 */
> >  	if (internal->device_type == IFCVF_BLK) {
> >  		for (i = 0; i < hw->nr_vring; i++) {
> > -			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> > -			while (vq.avail->idx != vq.used->idx) {
> > -				ifcvf_notify_queue(hw, i);
> > -				usleep(10);
> > -			}
> > -			hw->vring[i].last_avail_idx = vq.avail->idx;
> > -			hw->vring[i].last_used_idx = vq.used->idx;
> > +			do {
> > +				if (hw->lm_cfg != NULL)
> > +					ring_state = *(u32 *)(hw->lm_cfg +
> > +
> 	IFCVF_LM_RING_STATE_OFFSET +
> > +						i * IFCVF_LM_CFG_SIZE);
> > +				hw->vring[i].last_avail_idx =
> > +					(u16)(ring_state &
> IFCVF_16_BIT_MASK);
> > +				hw->vring[i].last_used_idx =
> > +					(u16)(ring_state >> 16);
> > +				if (hw->vring[i].last_avail_idx !=
> > +					hw->vring[i].last_used_idx) {
> > +					ifcvf_notify_queue(hw, i);
> > +					usleep(10);
> > +				}
> > +			} while (hw->vring[i].last_avail_idx !=
> > +				hw->vring[i].last_used_idx);
> >  		}
> >  	}
> >
> > --
> > 1.8.3.1
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (5 preceding siblings ...)
  2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-05-18 12:13   ` Andy Pei
  2022-05-18 12:13     ` [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (12 more replies)
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  8 siblings, 13 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v8:
 delete some redundant code.
 fix some commit log.
v7:
 Check on expected fd num in new vhost msg handler.
 Sanity check on vhost msg size.
 Fix typo.
 Add commit log to help understand code.
 Remove duplicated code.
 Add new API to get vDPA device type.
v6:
 fix some commit log.
 add vhost socket in log output to make it more user-friendly.
 when driver ops fail, just output some log, do not break message handler.
 check vhost msg size in msg handler.
v5:
 fix some coding style issues.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.
Andy Pei (13):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support for get/set config
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt relay for blk device
  vdpa/ifc: add block device SW live-migration
  vhost: add API to get vDPA device type
  vdpa/ifc: add get device type ops to ifc driver
  examples/vdpa: add add virtio blk support
  usertools: add support for virtio blk device
  vdpa/ifc: add log for config space of virtio blk
  vdpa/ifc: add interrupt handling for config space
  vdpa/ifc/base: access correct register for blk device
 doc/guides/prog_guide/vhost_lib.rst    |   5 +
 doc/guides/rel_notes/release_22_07.rst |   4 +
 drivers/vdpa/ifc/base/ifcvf.c          |  34 ++-
 drivers/vdpa/ifc/base/ifcvf.h          |  21 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c          | 408 +++++++++++++++++++++++++++++++--
 examples/vdpa/main.c                   |  56 +++++
 examples/vdpa/vdpa_blk_compact.h       |  58 +++++
 lib/vhost/rte_vhost.h                  |  17 ++
 lib/vhost/socket.c                     |  44 ++++
 lib/vhost/vdpa_driver.h                |  11 +-
 lib/vhost/version.map                  |   1 +
 lib/vhost/vhost_user.c                 |  73 ++++++
 lib/vhost/vhost_user.h                 |  13 ++
 usertools/dpdk-devbind.py              |   5 +-
 14 files changed, 725 insertions(+), 25 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  3:41       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 02/13] vhost: add vDPA ops for " Andy Pei
                       ` (11 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 91 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 9 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..483d38b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_DEVICE_ID                 0x0002
+
 #define IFCVF_VENDOR_ID		0x1AF4
 #define IFCVF_DEVICE_ID		0x1041
 #define IFCVF_SUBSYS_VENDOR_ID	0x8086
@@ -126,13 +135,18 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	int device_type;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..be0efd3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -75,6 +75,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1173,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1226,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1276,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->hw.device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->hw.device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1305,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->hw.device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1313,6 +1374,20 @@ struct internal_list {
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 02/13] vhost: add vDPA ops for blk device
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-18 12:13     ` [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  3:46       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 03/13] vhost: add vhost msg support for get/set config Andy Pei
                       ` (10 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 03/13] vhost: add vhost msg support for get/set config
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-18 12:13     ` [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-05-18 12:13     ` [PATCH v8 02/13] vhost: add vDPA ops for " Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  3:54       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (9 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 13 +++++++++
 2 files changed, 86 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 850848c..1dd1e25f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2468,6 +2468,77 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) get_config() return error!\n",
+					 dev->ifname);
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) vhost_user_config size: %"PRIu32", should not be larger than %d\n",
+			dev->ifname, ctx->msg.payload.cfg.size,
+			VHOST_USER_MAX_CONFIG_SIZE);
+		goto out;
+	}
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+		if (ret)
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) set_config() return error!\n",
+					 dev->ifname);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+
+out:
+	return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2686,6 +2757,8 @@ static int is_vring_iotlb(struct virtio_net *dev,
 VHOST_MESSAGE_HANDLER(VHOST_USER_NET_SET_MTU, vhost_user_net_set_mtu, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_SET_SLAVE_REQ_FD, vhost_user_set_req_fd, true) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_IOTLB_MSG, vhost_user_iotlb_msg, false) \
+VHOST_MESSAGE_HANDLER(VHOST_USER_GET_CONFIG, vhost_user_get_config, false) \
+VHOST_MESSAGE_HANDLER(VHOST_USER_SET_CONFIG, vhost_user_set_config, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_ADVISE, vhost_user_set_postcopy_advise, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_LISTEN, vhost_user_set_postcopy_listen, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_END, vhost_user_postcopy_end, false) \
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index ba1c5c7..c4d091e 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -123,6 +125,16 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -146,6 +158,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 03/13] vhost: add vhost msg support for get/set config Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  4:07       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
                       ` (8 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 91 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 94 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 483d38b..244de46 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -67,6 +67,10 @@
 #define IFCVF_32_BIT_MASK		0xffffffff
 
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index be0efd3..350214a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1087,6 +1087,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1199,6 +1203,91 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (len < sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			len, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	/* The capacity is number of sectors in 512-byte.
+	 * So right shift 1 bit  we get in K,
+	 * another right shift 10 bits we get in M,
+	 * right shift 10 more bits, we get in G.
+	 * To show capacity in G, we right shift 21 bits in total.
+	 */
+	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(DEBUG, "geometry");
+	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = ifcvf_set_vring_state,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1211,7 +1300,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 05/13] vdpa/ifc: add vDPA interrupt relay for blk device
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  4:10       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (7 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
For the net device type, only interrupt of rxq needed to be relayed.
But for block, since all the queues are used for both read and write
requests. Interrupt of all queues needed to be relayed.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 350214a..509a1ed 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -369,6 +369,7 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
@@ -378,7 +379,13 @@ struct rte_vdpa_dev_info {
 	for (i = 0; i < nr_vring; i++) {
 		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
 		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
+		if (m_rx == true &&
+			((i & 1) == 0 || internal->hw.device_type == IFCVF_BLK)) {
+			/* For the net we only need to relay rx queue,
+			 * which will change the mem of VM.
+			 * For the blk we need to relay all the read cmd
+			 * of each queue
+			 */
 			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
 			if (fd < 0) {
 				DRV_LOG(ERR, "can't setup eventfd: %s",
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  5:25       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 07/13] vhost: add API to get vDPA device type Andy Pei
                       ` (6 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add SW live-migration support to block device.
For block device, it is critical that no packet
should be dropped. So when virtio blk device is
pause, make sure hardware last_avail_idx and
last_used_idx are the same. This indicates all
requests have recieved acks, and no inflight IO.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 42 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 244de46..4fb1736 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 509a1ed..3e78c7d 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -316,8 +316,34 @@ struct rte_vdpa_dev_info {
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
+
+	/* to make sure no packet is lost for blk device
+	 * do not stop until last_avail_idx == last_used_idx
+	 */
+	if (internal->hw.device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
+		}
+	}
+
 	ifcvf_stop_hw(hw);
 
 	for (i = 0; i < hw->nr_vring; i++)
@@ -641,8 +667,10 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NET: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
@@ -692,8 +720,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->hw.device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->hw.device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -755,7 +787,9 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
+	for (qid = 0; qid < q_num; qid += 1) {
+		if ((internal->hw.device_type == IFCVF_NET) && (qid & 1))
+			continue;
 		ev.events = EPOLLIN | EPOLLPRI;
 		/* leave a flag to mark it's for interrupt */
 		ev.data.u64 = 1 | qid << 1 |
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 07/13] vhost: add API to get vDPA device type
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  7:26       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
                       ` (5 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Vhost backend of different devices have different features.
Add a API to get vDPA device type, net device or blk device
currently, so users can set different features for different
kinds of devices.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 doc/guides/prog_guide/vhost_lib.rst    |  5 ++++
 doc/guides/rel_notes/release_22_07.rst |  4 ++++
 lib/vhost/rte_vhost.h                  | 17 +++++++++++++
 lib/vhost/socket.c                     | 44 ++++++++++++++++++++++++++++++++++
 lib/vhost/vdpa_driver.h                |  3 +++
 lib/vhost/version.map                  |  1 +
 6 files changed, 74 insertions(+)
diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst
index f287b76..0337b38 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -282,6 +282,11 @@ The following is an overview of some key Vhost API functions:
   Clear inflight packets which are submitted to DMA engine in vhost async data
   path. Completed packets are returned to applications through ``pkts``.
 
+* ``rte_vhost_driver_get_vdpa_dev_type(path, type)``
+
+  Get device type of vDPA device, such as VDPA_DEVICE_TYPE_NET,
+  VDPA_DEVICE_TYPE_BLK.
+
 Vhost-user Implementations
 --------------------------
 
diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
index e49cace..9550977 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -104,6 +104,10 @@ New Features
   * ``RTE_EVENT_QUEUE_ATTR_WEIGHT``
   * ``RTE_EVENT_QUEUE_ATTR_AFFINITY``
 
+* **Added vhost API to get the device type of a vDPA device.**
+
+  Added an API which can get the device type of vDPA device.
+
 
 Removed Items
 -------------
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index c733f85..c977a24 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -117,6 +117,9 @@
 
 #define RTE_MAX_VHOST_DEVICE	1024
 
+#define VDPA_DEVICE_TYPE_NET 0
+#define VDPA_DEVICE_TYPE_BLK 1
+
 struct rte_vdpa_device;
 
 /**
@@ -486,6 +489,20 @@ struct rte_vdpa_device *
 rte_vhost_driver_get_vdpa_device(const char *path);
 
 /**
+ * Get the device type of the vdpa device.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param type
+ *  the device type of the vdpa device
+ * @return
+ *  0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index b304339..ef0f401 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -619,6 +619,50 @@ struct rte_vdpa_device *
 }
 
 int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
+{
+	struct vhost_user_socket *vsocket;
+	struct rte_vdpa_device *vdpa_dev;
+	uint32_t vdpa_type = 0;
+	int ret = 0;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (!vsocket) {
+		VHOST_LOG_CONFIG(ERR,
+				 "(%s) socket file is not registered yet.\n",
+				 path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	vdpa_dev = vsocket->vdpa_dev;
+	if (!vdpa_dev) {
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_dev_type) {
+		ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
+		if (ret) {
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) failed to get vdpa dev type for socket file.\n",
+					 path);
+			ret = -1;
+			goto unlock_exit;
+		}
+	} else {
+		vdpa_type = VDPA_DEVICE_TYPE_NET;
+	}
+
+	*type = vdpa_type;
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
 	struct vhost_user_socket *vsocket;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index e59a834..9cbd7cd 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
 	/** Set the device configuration space */
 	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
 		      uint32_t size, uint32_t flags);
+
+	/** get device type: net device, blk device... */
+	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
 };
 
 /**
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 5841315..583b4f3 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -90,6 +90,7 @@ EXPERIMENTAL {
 
 	# added in 22.07
 	rte_vhost_async_get_inflight_thread_unsafe;
+	rte_vhost_driver_get_vdpa_dev_type;
 
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 07/13] vhost: add API to get vDPA device type Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  7:30       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 09/13] examples/vdpa: add add virtio blk support Andy Pei
                       ` (4 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add get device type ops to ifc driver.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3e78c7d..0acfa8c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1189,6 +1189,29 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static int
+ifcvf_get_device_type(struct rte_vdpa_device *vdev,
+	uint32_t *type)
+{
+	struct ifcvf_internal *internal;
+	struct internal_list *list;
+
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	if (internal->hw.device_type == IFCVF_BLK)
+		*type = VDPA_DEVICE_TYPE_BLK;
+	else
+		*type = VDPA_DEVICE_TYPE_NET;
+
+	return 0;
+}
+
 static struct rte_vdpa_dev_ops ifcvf_ops = {
 	.get_queue_num = ifcvf_get_queue_num,
 	.get_features = ifcvf_get_vdpa_features,
@@ -1201,6 +1224,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
+	.get_dev_type = ifcvf_get_device_type,
 };
 
 static inline int
@@ -1327,6 +1351,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
 	.get_config = ifcvf_blk_get_config,
+	.get_dev_type = ifcvf_get_device_type,
 };
 
 struct rte_vdpa_dev_info dev_info[] = {
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 09/13] examples/vdpa: add add virtio blk support
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  7:40       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 10/13] usertools: add support for virtio blk device Andy Pei
                       ` (3 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add virtio blk device support to vDPA example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             | 56 ++++++++++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 58 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..62b6660 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -159,8 +160,53 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
+	uint32_t device_type = 0;
 	int ret;
 	char *socket_path = vport->ifname;
 
@@ -192,6 +238,16 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
+	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
+		RTE_LOG(NOTICE, VDPA, "%s is a blk device\n", socket_path);
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..4193561
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1))
+
+#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX))
+
+#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
+	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 10/13] usertools: add support for virtio blk device
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 09/13] examples/vdpa: add add virtio blk support Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  7:43       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
                       ` (2 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..7231be4 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -72,6 +72,9 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+                    'SVendor': None, 'SDevice': None}
+
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -82,7 +85,7 @@
 compress_devices = [cavium_zip]
 regex_devices = [cn9k_ree]
 misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
-                intel_ntb_skx, intel_ntb_icx]
+                intel_ntb_skx, intel_ntb_icx, virtio_blk]
 
 # global dict ethernet devices present. Dictionary indexed by PCI address.
 # Each device within this is itself a dictionary of device properties
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 11/13] vdpa/ifc: add log for config space of virtio blk
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 10/13] usertools: add support for virtio blk device Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  7:46       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
  2022-05-18 12:13     ` [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0acfa8c..376a1af 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1382,6 +1382,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1448,6 +1451,37 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		/* The capacity is number of sectors in 512-byte.
+		 * So right shift 1 bit  we get in K,
+		 * another right shift 10 bits we get in M,
+		 * right shift 10 more bits, we get in G.
+		 * To show capacity in G, we right shift 21 bits in total.
+		 */
+		DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(DEBUG, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(DEBUG, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(DEBUG, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(DEBUG, "geometry");
+		DRV_LOG(DEBUG, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(DEBUG, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(DEBUG, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(DEBUG, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 12/13] vdpa/ifc: add interrupt handling for config space
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  7:54       ` Xia, Chenbo
  2022-05-18 12:13     ` [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to inform QEMU.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 118 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 376a1af..8a49622 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid; /* thread for config space change interrupt relay */
 	int epfd;
+	int csc_epfd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -566,6 +568,111 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_epfd, csc_val = 0;
+
+	csc_epfd = epoll_create(1);
+	if (csc_epfd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_epfd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		return NULL;
+	}
+
+	internal->csc_epfd = csc_epfd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_epfd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail.");
+			return NULL;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR ||
+				    errno == EWOULDBLOCK ||
+				    errno == EAGAIN)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				return NULL;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	char name[THREAD_NAME_LEN];
+	int ret;
+
+	snprintf(name, sizeof(name), "ifc-intr-%d", internal->vid);
+	ret = rte_ctrl_thread_create(&internal->intr_tid, name, NULL,
+				     intr_relay, (void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_epfd >= 0)
+		close(internal->csc_epfd);
+	internal->csc_epfd = -1;
+
+	return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -592,10 +699,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		ret = unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -812,7 +925,7 @@ struct rte_vdpa_dev_info {
 		if (nfds < 0) {
 			if (errno == EINTR)
 				continue;
-			DRV_LOG(ERR, "epoll_wait return fail\n");
+			DRV_LOG(ERR, "epoll_wait return fail.");
 			return NULL;
 		}
 
@@ -888,6 +1001,9 @@ struct rte_vdpa_dev_info {
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
 	vdpa_ifcvf_stop(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk device
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-05-18 12:13     ` [PATCH v8 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
@ 2022-05-18 12:13     ` Andy Pei
  2022-05-23  7:55       ` Xia, Chenbo
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-18 12:13 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Register address is different between net and blk device.
We are re-use most of the code, when register address is
different, we have to check net and blk device go through
different code.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..dd475a7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,17 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->device_type == IFCVF_BLK)
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		else
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +261,22 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->device_type == IFCVF_BLK)
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		else
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+
+		if (hw->device_type == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device
  2022-05-18 12:13     ` [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-05-23  3:41       ` Xia, Chenbo
  2022-05-23  4:33         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  3:41 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Andy,
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device
> 
> Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
> Blk and net device are implemented with proper feature and ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 91
> +++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 98 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 573a35f..483d38b 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -5,8 +5,17 @@
>  #ifndef _IFCVF_H_
>  #define _IFCVF_H_
> 
> +#include <linux/virtio_blk.h>
>  #include "ifcvf_osdep.h"
> 
> +#define IFCVF_NET	0
> +#define IFCVF_BLK	1
> +
> +/* for BLK */
> +#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
> +#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
> +#define IFCVF_BLK_DEVICE_ID                 0x0002
> +
>  #define IFCVF_VENDOR_ID		0x1AF4
>  #define IFCVF_DEVICE_ID		0x1041
>  #define IFCVF_SUBSYS_VENDOR_ID	0x8086
Let's rename IFCVF_DEVICE_ID to IFCVF_NET_DEVICE_ID as it's only used for net now.
> @@ -126,13 +135,18 @@ struct ifcvf_hw {
>  	u8     notify_region;
>  	u32    notify_off_multiplier;
>  	struct ifcvf_pci_common_cfg *common_cfg;
> -	struct ifcvf_net_config *dev_cfg;
> +	union {
> +		struct ifcvf_net_config *net_cfg;
> +		struct virtio_blk_config *blk_cfg;
> +		void *dev_cfg;
> +	};
>  	u8     *isr;
>  	u16    *notify_base;
>  	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
>  	u8     *lm_cfg;
>  	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
>  	u8 nr_vring;
> +	int device_type;
>  	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
>  };
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 9f05595..be0efd3 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -75,6 +75,12 @@ struct internal_list {
>  	struct ifcvf_internal *internal;
>  };
> 
> +/* vdpa device info includes device features and devcic operation. */
> +struct rte_vdpa_dev_info {
> +	uint64_t features;
> +	struct rte_vdpa_dev_ops *ops;
> +};
> +
>  TAILQ_HEAD(internal_list_head, internal_list);
>  static struct internal_list_head internal_list =
>  	TAILQ_HEAD_INITIALIZER(internal_list);
> @@ -1167,6 +1173,48 @@ struct internal_list {
>  	return 0;
>  }
> 
> +static int16_t
> +ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
> +{
> +	uint16_t pci_device_id = pci_dev->id.device_id;
> +	uint16_t device_id;
> +
> +	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
> +		DRV_LOG(ERR, "Probe device is not a virtio device\n");
> +		return -1;
> +	}
> +
> +	if (pci_device_id < 0x1040) {
> +		/* Transitional devices: use the PCI subsystem device id as
> +		 * virtio device id, same as legacy driver always did.
> +		 */
> +		device_id = pci_dev->id.subsystem_device_id;
> +	} else {
> +		/* Modern devices: simply use PCI device id,
> +		 * but start from 0x1040.
> +		 */
> +		device_id = pci_device_id - 0x1040;
> +	}
> +
> +	return device_id;
> +}
> +
> +struct rte_vdpa_dev_info dev_info[] = {
> +	{
> +		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> +			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
> +			    (1ULL << VIRTIO_NET_F_STATUS) |
> +			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
> +			    (1ULL << VHOST_F_LOG_ALL),
> +		.ops = &ifcvf_ops,
Rename ifcvf_ops -> ifcvf_net_ops
Overall the patch LGTM. With above fixed:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> +	},
> +	{
> +		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
> +			    (1ULL << VHOST_F_LOG_ALL),
> +		.ops = NULL,
> +	},
> +};
> +
>  static int
>  ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  		struct rte_pci_device *pci_dev)
> @@ -1178,6 +1226,7 @@ struct internal_list {
>  	int sw_fallback_lm = 0;
>  	struct rte_kvargs *kvlist = NULL;
>  	int ret = 0;
> +	int16_t device_id;
> 
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
> @@ -1227,13 +1276,24 @@ struct internal_list {
>  	internal->configured = 0;
>  	internal->max_queues = IFCVF_MAX_QUEUES;
>  	features = ifcvf_get_features(&internal->hw);
> -	internal->features = (features &
> -		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
> -		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> -		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
> -		(1ULL << VIRTIO_NET_F_STATUS) |
> -		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
> -		(1ULL << VHOST_F_LOG_ALL);
> +
> +	device_id = ifcvf_pci_get_device_type(pci_dev);
> +	if (device_id < 0) {
> +		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
> +		goto error;
> +	}
> +
> +	if (device_id == VIRTIO_ID_NET) {
> +		internal->hw.device_type = IFCVF_NET;
> +		internal->features = features &
> +					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
> +		internal->features |= dev_info[IFCVF_NET].features;
> +	} else if (device_id == VIRTIO_ID_BLOCK) {
> +		internal->hw.device_type = IFCVF_BLK;
> +		internal->features = features &
> +					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
> +		internal->features |= dev_info[IFCVF_BLK].features;
> +	}
> 
>  	list->internal = internal;
> 
> @@ -1245,7 +1305,8 @@ struct internal_list {
>  	}
>  	internal->sw_lm = sw_fallback_lm;
> 
> -	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
> &ifcvf_ops);
> +	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
> +				dev_info[internal->hw.device_type].ops);
>  	if (internal->vdev == NULL) {
>  		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
>  		goto error;
> @@ -1313,6 +1374,20 @@ struct internal_list {
>  	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
>  	},
> 
> +	{ .class_id = RTE_CLASS_ANY_ID,
> +	  .vendor_id = IFCVF_VENDOR_ID,
> +	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
> +	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
> +	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
> +	},
> +
> +	{ .class_id = RTE_CLASS_ANY_ID,
> +	  .vendor_id = IFCVF_VENDOR_ID,
> +	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
> +	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
> +	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
> +	},
> +
>  	{ .vendor_id = 0, /* sentinel */
>  	},
>  };
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 02/13] vhost: add vDPA ops for blk device
  2022-05-18 12:13     ` [PATCH v8 02/13] vhost: add vDPA ops for " Andy Pei
@ 2022-05-23  3:46       ` Xia, Chenbo
  2022-05-23  4:38         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  3:46 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 02/13] vhost: add vDPA ops for blk device
> 
> Get_config and set_config are necessary ops for blk device.
> Add get_config and set_config ops to vDPA ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/vhost/vdpa_driver.h | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
> index 88138be..e59a834 100644
> --- a/lib/vhost/vdpa_driver.h
> +++ b/lib/vhost/vdpa_driver.h
> @@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
>  	/** Reset statistics of the queue */
>  	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
> 
> -	/** Reserved for future extension */
> -	void *reserved[2];
> +	/** Get the device configuration space */
> +	int (*get_config)(int vid, uint8_t *config, uint32_t len);
One suggestion is renaming 'len' to 'size' to make it aligned to set_config.
With above fixed:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> +
> +	/** Set the device configuration space */
> +	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
> +		      uint32_t size, uint32_t flags);
>  };
> 
>  /**
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 03/13] vhost: add vhost msg support for get/set config
  2022-05-18 12:13     ` [PATCH v8 03/13] vhost: add vhost msg support for get/set config Andy Pei
@ 2022-05-23  3:54       ` Xia, Chenbo
  2022-05-23  4:49         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  3:54 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 03/13] vhost: add vhost msg support for get/set config
> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 73
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 13 +++++++++
>  2 files changed, 86 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 850848c..1dd1e25f 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -2468,6 +2468,77 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) get_config() return error!\n",
> +					 dev->ifname);
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) vhost_user_config size: %"PRIu32", should not be
> larger than %d\n",
> +			dev->ifname, ctx->msg.payload.cfg.size,
> +			VHOST_USER_MAX_CONFIG_SIZE);
> +		goto out;
> +	}
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +		if (ret)
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) set_config() return error!\n",
> +					 dev->ifname);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_OK;
> +
> +out:
> +	return RTE_VHOST_MSG_RESULT_ERR;
> +}
I missed one thing in previous reviews: vdpa_dev pointer needed to be checked (Not Null)
in both set/get config.
Thanks,
Chenbo
> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2686,6 +2757,8 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  VHOST_MESSAGE_HANDLER(VHOST_USER_NET_SET_MTU, vhost_user_net_set_mtu,
> false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_SLAVE_REQ_FD, vhost_user_set_req_fd,
> true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_IOTLB_MSG, vhost_user_iotlb_msg, false)
> \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_GET_CONFIG, vhost_user_get_config, false)
> \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_SET_CONFIG, vhost_user_set_config, false)
> \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_ADVISE,
> vhost_user_set_postcopy_advise, false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_LISTEN,
> vhost_user_set_postcopy_listen, false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_END, vhost_user_postcopy_end,
> false) \
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index ba1c5c7..c4d091e 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -123,6 +125,16 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -146,6 +158,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-18 12:13     ` [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-05-23  4:07       ` Xia, Chenbo
  2022-05-23  5:04         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  4:07 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device
> 
> For virtio blk device, re-use part of ifc driver ops.
> Implement ifcvf_blk_get_config for virtio blk device.
> Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
> blk device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h |  4 ++
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 91
> ++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 483d38b..244de46 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -67,6 +67,10 @@
>  #define IFCVF_32_BIT_MASK		0xffffffff
> 
> 
> +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> +#endif
> +
>  struct ifcvf_pci_cap {
>  	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
>  	u8 cap_next;            /* Generic PCI field: next ptr. */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index be0efd3..350214a 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1087,6 +1087,10 @@ struct rte_vdpa_dev_info {
>  		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
>  		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
>  		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
> +
> +#define VDPA_BLK_PROTOCOL_FEATURES \
> +		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
> +
>  static int
>  ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t
> *features)
>  {
> @@ -1199,6 +1203,91 @@ struct rte_vdpa_dev_info {
>  	return device_id;
>  }
> 
> +static int
> +ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
> +{
> +	struct virtio_blk_config *dev_cfg;
> +	struct ifcvf_internal *internal;
> +	struct rte_vdpa_device *vdev;
> +	struct internal_list *list;
> +	uint32_t i;
> +	uint64_t capacity = 0;
> +	uint8_t *byte;
> +
> +	if (len < sizeof(struct virtio_blk_config)) {
> +		DRV_LOG(ERR, "Invalid len: %u, required: %u",
> +			len, (uint32_t)sizeof(struct virtio_blk_config));
> +		return -1;
> +	}
I believe it should not be > sizeof(struct virtio_blk_config) too?
And one question is should we limit only reading the whole blk config.
I guess we are having this check because of current usage of QEMU? 
Although this is fine as it's vendor-specific logic, just wondering if
we should have this limit for blk device.
Thanks,
Chenbo
> +
> +	vdev = rte_vhost_get_vdpa_device(vid);
> +	list = find_internal_resource_by_vdev(vdev);
> +	if (list == NULL) {
> +		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
> +		return -1;
> +	}
> +
> +	internal = list->internal;
> +
> +	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
> +		config[i] = *((u8 *)internal->hw.blk_cfg + i);
> +
> +	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
> +
> +	/* cannot read 64-bit register in one attempt, so read byte by byte.
> */
> +	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> +		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> +		capacity |= (uint64_t)*byte << (i * 8);
> +	}
> +	/* The capacity is number of sectors in 512-byte.
> +	 * So right shift 1 bit  we get in K,
> +	 * another right shift 10 bits we get in M,
> +	 * right shift 10 more bits, we get in G.
> +	 * To show capacity in G, we right shift 21 bits in total.
> +	 */
> +	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
> +
> +	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
> +	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
> +	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
> +	DRV_LOG(DEBUG, "geometry");
> +	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
> +	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
> +	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
> +	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
> +
> +	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
> +		config[0], config[1], config[2], config[3], config[4],
> +		config[5], config[6], config[7]);
> +	return 0;
> +}
> +
> +static int
> +ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
> +	uint64_t *features)
> +{
> +	RTE_SET_USED(vdev);
> +
> +	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
> +	*features |= VDPA_BLK_PROTOCOL_FEATURES;
> +	return 0;
> +}
> +
> +static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
> +	.get_queue_num = ifcvf_get_queue_num,
> +	.get_features = ifcvf_get_vdpa_features,
> +	.set_features = ifcvf_set_features,
> +	.get_protocol_features = ifcvf_blk_get_protocol_features,
> +	.dev_conf = ifcvf_dev_config,
> +	.dev_close = ifcvf_dev_close,
> +	.set_vring_state = ifcvf_set_vring_state,
> +	.migration_done = NULL,
> +	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
> +	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> +	.get_notify_area = ifcvf_get_notify_area,
> +	.get_config = ifcvf_blk_get_config,
> +};
> +
>  struct rte_vdpa_dev_info dev_info[] = {
>  	{
>  		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> @@ -1211,7 +1300,7 @@ struct rte_vdpa_dev_info dev_info[] = {
>  	{
>  		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
>  			    (1ULL << VHOST_F_LOG_ALL),
> -		.ops = NULL,
> +		.ops = &ifcvf_blk_ops,
>  	},
>  };
> 
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 05/13] vdpa/ifc: add vDPA interrupt relay for blk device
  2022-05-18 12:13     ` [PATCH v8 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
@ 2022-05-23  4:10       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  4:10 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 05/13] vdpa/ifc: add vDPA interrupt relay for blk
> device
> 
> For the net device type, only interrupt of rxq needed to be relayed.
> But for block, since all the queues are used for both read and write
> requests. Interrupt of all queues needed to be relayed.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 350214a..509a1ed 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -369,6 +369,7 @@ struct rte_vdpa_dev_info {
>  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>  	irq_set->start = 0;
>  	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change
> notification */
>  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>  		rte_intr_fd_get(internal->pdev->intr_handle);
> 
> @@ -378,7 +379,13 @@ struct rte_vdpa_dev_info {
>  	for (i = 0; i < nr_vring; i++) {
>  		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
>  		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> +		if (m_rx == true &&
> +			((i & 1) == 0 || internal->hw.device_type == IFCVF_BLK))
> {
> +			/* For the net we only need to relay rx queue,
> +			 * which will change the mem of VM.
> +			 * For the blk we need to relay all the read cmd
> +			 * of each queue
> +			 */
>  			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>  			if (fd < 0) {
>  				DRV_LOG(ERR, "can't setup eventfd: %s",
> --
> 1.8.3.1
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device
  2022-05-23  3:41       ` Xia, Chenbo
@ 2022-05-23  4:33         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  4:33 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Thanks for your reply, my reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 11:42 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device
> 
> Hi Andy,
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device
> >
> > Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
> > Blk and net device are implemented with proper feature and ops.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.h | 16 +++++++-
> > drivers/vdpa/ifc/ifcvf_vdpa.c | 91
> > +++++++++++++++++++++++++++++++++++++++----
> >  2 files changed, 98 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 573a35f..483d38b 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -5,8 +5,17 @@
> >  #ifndef _IFCVF_H_
> >  #define _IFCVF_H_
> >
> > +#include <linux/virtio_blk.h>
> >  #include "ifcvf_osdep.h"
> >
> > +#define IFCVF_NET	0
> > +#define IFCVF_BLK	1
> > +
> > +/* for BLK */
> > +#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
> > +#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
> > +#define IFCVF_BLK_DEVICE_ID                 0x0002
> > +
> >  #define IFCVF_VENDOR_ID		0x1AF4
> >  #define IFCVF_DEVICE_ID		0x1041
> >  #define IFCVF_SUBSYS_VENDOR_ID	0x8086
> 
> Let's rename IFCVF_DEVICE_ID to IFCVF_NET_DEVICE_ID as it's only used for
> net now.
> 
Sure, I will do it in next version.
> > @@ -126,13 +135,18 @@ struct ifcvf_hw {
> >  	u8     notify_region;
> >  	u32    notify_off_multiplier;
> >  	struct ifcvf_pci_common_cfg *common_cfg;
> > -	struct ifcvf_net_config *dev_cfg;
> > +	union {
> > +		struct ifcvf_net_config *net_cfg;
> > +		struct virtio_blk_config *blk_cfg;
> > +		void *dev_cfg;
> > +	};
> >  	u8     *isr;
> >  	u16    *notify_base;
> >  	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
> >  	u8     *lm_cfg;
> >  	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
> >  	u8 nr_vring;
> > +	int device_type;
> >  	struct ifcvf_pci_mem_resource
> mem_resource[IFCVF_PCI_MAX_RESOURCE];
> >  };
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 9f05595..be0efd3 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -75,6 +75,12 @@ struct internal_list {
> >  	struct ifcvf_internal *internal;
> >  };
> >
> > +/* vdpa device info includes device features and devcic operation. */
> > +struct rte_vdpa_dev_info {
> > +	uint64_t features;
> > +	struct rte_vdpa_dev_ops *ops;
> > +};
> > +
> >  TAILQ_HEAD(internal_list_head, internal_list);  static struct
> > internal_list_head internal_list =
> >  	TAILQ_HEAD_INITIALIZER(internal_list);
> > @@ -1167,6 +1173,48 @@ struct internal_list {
> >  	return 0;
> >  }
> >
> > +static int16_t
> > +ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev) {
> > +	uint16_t pci_device_id = pci_dev->id.device_id;
> > +	uint16_t device_id;
> > +
> > +	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
> > +		DRV_LOG(ERR, "Probe device is not a virtio device\n");
> > +		return -1;
> > +	}
> > +
> > +	if (pci_device_id < 0x1040) {
> > +		/* Transitional devices: use the PCI subsystem device id as
> > +		 * virtio device id, same as legacy driver always did.
> > +		 */
> > +		device_id = pci_dev->id.subsystem_device_id;
> > +	} else {
> > +		/* Modern devices: simply use PCI device id,
> > +		 * but start from 0x1040.
> > +		 */
> > +		device_id = pci_device_id - 0x1040;
> > +	}
> > +
> > +	return device_id;
> > +}
> > +
> > +struct rte_vdpa_dev_info dev_info[] = {
> > +	{
> > +		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> > +			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
> > +			    (1ULL << VIRTIO_NET_F_STATUS) |
> > +			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
> > +			    (1ULL << VHOST_F_LOG_ALL),
> > +		.ops = &ifcvf_ops,
> 
> Rename ifcvf_ops -> ifcvf_net_ops
> 
> Overall the patch LGTM. With above fixed:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
OK, I will do this in next version.
> > +	},
> > +	{
> > +		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
> |
> > +			    (1ULL << VHOST_F_LOG_ALL),
> > +		.ops = NULL,
> > +	},
> > +};
> > +
> >  static int
> >  ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
> >  		struct rte_pci_device *pci_dev)
> > @@ -1178,6 +1226,7 @@ struct internal_list {
> >  	int sw_fallback_lm = 0;
> >  	struct rte_kvargs *kvlist = NULL;
> >  	int ret = 0;
> > +	int16_t device_id;
> >
> >  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> >  		return 0;
> > @@ -1227,13 +1276,24 @@ struct internal_list {
> >  	internal->configured = 0;
> >  	internal->max_queues = IFCVF_MAX_QUEUES;
> >  	features = ifcvf_get_features(&internal->hw);
> > -	internal->features = (features &
> > -		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
> > -		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> > -		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
> > -		(1ULL << VIRTIO_NET_F_STATUS) |
> > -		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
> > -		(1ULL << VHOST_F_LOG_ALL);
> > +
> > +	device_id = ifcvf_pci_get_device_type(pci_dev);
> > +	if (device_id < 0) {
> > +		DRV_LOG(ERR, "failed to get device %s type", pci_dev-
> >name);
> > +		goto error;
> > +	}
> > +
> > +	if (device_id == VIRTIO_ID_NET) {
> > +		internal->hw.device_type = IFCVF_NET;
> > +		internal->features = features &
> > +					~(1ULL <<
> VIRTIO_F_IOMMU_PLATFORM);
> > +		internal->features |= dev_info[IFCVF_NET].features;
> > +	} else if (device_id == VIRTIO_ID_BLOCK) {
> > +		internal->hw.device_type = IFCVF_BLK;
> > +		internal->features = features &
> > +					~(1ULL <<
> VIRTIO_F_IOMMU_PLATFORM);
> > +		internal->features |= dev_info[IFCVF_BLK].features;
> > +	}
> >
> >  	list->internal = internal;
> >
> > @@ -1245,7 +1305,8 @@ struct internal_list {
> >  	}
> >  	internal->sw_lm = sw_fallback_lm;
> >
> > -	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
> > &ifcvf_ops);
> > +	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
> > +				dev_info[internal->hw.device_type].ops);
> >  	if (internal->vdev == NULL) {
> >  		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
> >  		goto error;
> > @@ -1313,6 +1374,20 @@ struct internal_list {
> >  	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
> >  	},
> >
> > +	{ .class_id = RTE_CLASS_ANY_ID,
> > +	  .vendor_id = IFCVF_VENDOR_ID,
> > +	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
> > +	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
> > +	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
> > +	},
> > +
> > +	{ .class_id = RTE_CLASS_ANY_ID,
> > +	  .vendor_id = IFCVF_VENDOR_ID,
> > +	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
> > +	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
> > +	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
> > +	},
> > +
> >  	{ .vendor_id = 0, /* sentinel */
> >  	},
> >  };
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 02/13] vhost: add vDPA ops for blk device
  2022-05-23  3:46       ` Xia, Chenbo
@ 2022-05-23  4:38         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  4:38 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Thanks for your reply, I will send out a new version to address this.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 11:46 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 02/13] vhost: add vDPA ops for blk device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 02/13] vhost: add vDPA ops for blk device
> >
> > Get_config and set_config are necessary ops for blk device.
> > Add get_config and set_config ops to vDPA ops.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >  lib/vhost/vdpa_driver.h | 8 ++++++--
> >  1 file changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index
> > 88138be..e59a834 100644
> > --- a/lib/vhost/vdpa_driver.h
> > +++ b/lib/vhost/vdpa_driver.h
> > @@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
> >  	/** Reset statistics of the queue */
> >  	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
> >
> > -	/** Reserved for future extension */
> > -	void *reserved[2];
> > +	/** Get the device configuration space */
> > +	int (*get_config)(int vid, uint8_t *config, uint32_t len);
> 
> One suggestion is renaming 'len' to 'size' to make it aligned to set_config.
> 
> With above fixed:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
> > +
> > +	/** Set the device configuration space */
> > +	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
> > +		      uint32_t size, uint32_t flags);
> >  };
> >
> >  /**
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 03/13] vhost: add vhost msg support for get/set config
  2022-05-23  3:54       ` Xia, Chenbo
@ 2022-05-23  4:49         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  4:49 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi  Chenbo,
Thanks for your reply.
I will check it in next  version patch.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 11:55 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 03/13] vhost: add vhost msg support for get/set
> config
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 03/13] vhost: add vhost msg support for get/set
> > config
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 73
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 13 +++++++++
> >  2 files changed, 86 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 850848c..1dd1e25f 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -2468,6 +2468,77 @@ static int is_vring_iotlb(struct virtio_net
> > *dev,  }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (vdpa_dev->ops->get_config) {
> > +		ret = vdpa_dev->ops->get_config(dev->vid,
> > +					   ctx->msg.payload.cfg.region,
> > +					   ctx->msg.payload.cfg.size);
> > +		if (ret != 0) {
> > +			ctx->msg.size = 0;
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) get_config() return error!\n",
> > +					 dev->ifname);
> > +		}
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) vhost_user_config size: %"PRIu32", should not
> be
> > larger than %d\n",
> > +			dev->ifname, ctx->msg.payload.cfg.size,
> > +			VHOST_USER_MAX_CONFIG_SIZE);
> > +		goto out;
> > +	}
> > +
> > +	if (vdpa_dev->ops->set_config) {
> > +		ret = vdpa_dev->ops->set_config(dev->vid,
> > +			ctx->msg.payload.cfg.region,
> > +			ctx->msg.payload.cfg.offset,
> > +			ctx->msg.payload.cfg.size,
> > +			ctx->msg.payload.cfg.flags);
> > +		if (ret)
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) set_config() return error!\n",
> > +					 dev->ifname);
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_OK;
> > +
> > +out:
> > +	return RTE_VHOST_MSG_RESULT_ERR;
> > +}
> 
> I missed one thing in previous reviews: vdpa_dev pointer needed to be
> checked (Not Null) in both set/get config.
> 
> Thanks,
> Chenbo
> 
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >  			struct vhu_msg_context *ctx,
> >  			int main_fd __rte_unused)
> > @@ -2686,6 +2757,8 @@ static int is_vring_iotlb(struct virtio_net
> > *dev,  VHOST_MESSAGE_HANDLER(VHOST_USER_NET_SET_MTU,
> > vhost_user_net_set_mtu,
> > false) \
> >  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_SLAVE_REQ_FD,
> > vhost_user_set_req_fd,
> > true) \
> >  VHOST_MESSAGE_HANDLER(VHOST_USER_IOTLB_MSG,
> vhost_user_iotlb_msg,
> > false) \
> > +VHOST_MESSAGE_HANDLER(VHOST_USER_GET_CONFIG,
> vhost_user_get_config,
> > +false)
> > \
> > +VHOST_MESSAGE_HANDLER(VHOST_USER_SET_CONFIG,
> vhost_user_set_config,
> > +false)
> > \
> >  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_ADVISE,
> > vhost_user_set_postcopy_advise, false) \
> > VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_LISTEN,
> > vhost_user_set_postcopy_listen, false) \
> > VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_END,
> > vhost_user_postcopy_end,
> > false) \
> > diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > ba1c5c7..c4d091e 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >  	VHOST_USER_NET_SET_MTU = 20,
> >  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >  	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_GET_CONFIG = 24,
> > +	VHOST_USER_SET_CONFIG = 25,
> >  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >  	VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -123,6 +125,16 @@
> >  	uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> >  typedef struct VhostUserMsg {
> >  	union {
> >  		uint32_t master; /* a VhostUserRequest value */ @@ -146,6
> +158,7 @@
> >  		VhostUserCryptoSessionParam crypto_session;
> >  		VhostUserVringArea area;
> >  		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> >  	} payload;
> >  	/* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-23  4:07       ` Xia, Chenbo
@ 2022-05-23  5:04         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  5:04 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
HI Chenbo,
Thanks for your reply, my reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 12:08 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device
> >
> > For virtio blk device, re-use part of ifc driver ops.
> > Implement ifcvf_blk_get_config for virtio blk device.
> > Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio blk device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.h |  4 ++  drivers/vdpa/ifc/ifcvf_vdpa.c
> > | 91
> > ++++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 94 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 483d38b..244de46 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -67,6 +67,10 @@
> >  #define IFCVF_32_BIT_MASK		0xffffffff
> >
> >
> > +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> > +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> > +#endif
> > +
> >  struct ifcvf_pci_cap {
> >  	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
> >  	u8 cap_next;            /* Generic PCI field: next ptr. */
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index be0efd3..350214a 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1087,6 +1087,10 @@ struct rte_vdpa_dev_info {
> >  		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
> >  		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
> >  		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
> > +
> > +#define VDPA_BLK_PROTOCOL_FEATURES \
> > +		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
> > +
> >  static int
> >  ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t
> > *features)
> >  {
> > @@ -1199,6 +1203,91 @@ struct rte_vdpa_dev_info {
> >  	return device_id;
> >  }
> >
> > +static int
> > +ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len) {
> > +	struct virtio_blk_config *dev_cfg;
> > +	struct ifcvf_internal *internal;
> > +	struct rte_vdpa_device *vdev;
> > +	struct internal_list *list;
> > +	uint32_t i;
> > +	uint64_t capacity = 0;
> > +	uint8_t *byte;
> > +
> > +	if (len < sizeof(struct virtio_blk_config)) {
> > +		DRV_LOG(ERR, "Invalid len: %u, required: %u",
> > +			len, (uint32_t)sizeof(struct virtio_blk_config));
> > +		return -1;
> > +	}
> 
> I believe it should not be > sizeof(struct virtio_blk_config) too?
> 
I think I can make it   if (len != sizeof(struct virtio_blk_config))
> And one question is should we limit only reading the whole blk config.
> I guess we are having this check because of current usage of QEMU?
> Although this is fine as it's vendor-specific logic, just wondering if we should
> have this limit for blk device.
> 
I think if user want to read any part of the config space,
The args should be (int vid, uint8_t *config, uint32_t offset, uint32_t len).
offset mean the offset of struct virtio_blk_config to be the start of user needed data,
the len means the data size user need to read.
That will change the message between vhost and QEMU( or other software).
I thinks that is another story.
> Thanks,
> Chenbo
> 
> > +
> > +	vdev = rte_vhost_get_vdpa_device(vid);
> > +	list = find_internal_resource_by_vdev(vdev);
> > +	if (list == NULL) {
> > +		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
> > +		return -1;
> > +	}
> > +
> > +	internal = list->internal;
> > +
> > +	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
> > +		config[i] = *((u8 *)internal->hw.blk_cfg + i);
> > +
> > +	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
> > +
> > +	/* cannot read 64-bit register in one attempt, so read byte by byte.
> > */
> > +	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> > +		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> > +		capacity |= (uint64_t)*byte << (i * 8);
> > +	}
> > +	/* The capacity is number of sectors in 512-byte.
> > +	 * So right shift 1 bit  we get in K,
> > +	 * another right shift 10 bits we get in M,
> > +	 * right shift 10 more bits, we get in G.
> > +	 * To show capacity in G, we right shift 21 bits in total.
> > +	 */
> > +	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
> > +
> > +	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
> > +	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
> > +	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
> > +	DRV_LOG(DEBUG, "geometry");
> > +	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
> > +	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
> > +	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
> > +	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
> > +
> > +	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
> > +		config[0], config[1], config[2], config[3], config[4],
> > +		config[5], config[6], config[7]);
> > +	return 0;
> > +}
> > +
> > +static int
> > +ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
> > +	uint64_t *features)
> > +{
> > +	RTE_SET_USED(vdev);
> > +
> > +	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
> > +	*features |= VDPA_BLK_PROTOCOL_FEATURES;
> > +	return 0;
> > +}
> > +
> > +static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
> > +	.get_queue_num = ifcvf_get_queue_num,
> > +	.get_features = ifcvf_get_vdpa_features,
> > +	.set_features = ifcvf_set_features,
> > +	.get_protocol_features = ifcvf_blk_get_protocol_features,
> > +	.dev_conf = ifcvf_dev_config,
> > +	.dev_close = ifcvf_dev_close,
> > +	.set_vring_state = ifcvf_set_vring_state,
> > +	.migration_done = NULL,
> > +	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
> > +	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> > +	.get_notify_area = ifcvf_get_notify_area,
> > +	.get_config = ifcvf_blk_get_config,
> > +};
> > +
> >  struct rte_vdpa_dev_info dev_info[] = {
> >  	{
> >  		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> @@ -1211,7
> > +1300,7 @@ struct rte_vdpa_dev_info dev_info[] = {
> >  	{
> >  		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
> |
> >  			    (1ULL << VHOST_F_LOG_ALL),
> > -		.ops = NULL,
> > +		.ops = &ifcvf_blk_ops,
> >  	},
> >  };
> >
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration
  2022-05-18 12:13     ` [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-05-23  5:25       ` Xia, Chenbo
  2022-05-23  5:31         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  5:25 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration
> 
> Add SW live-migration support to block device.
> For block device, it is critical that no packet
> should be dropped. So when virtio blk device is
> pause, make sure hardware last_avail_idx and
paused
> last_used_idx are the same. This indicates all
> requests have recieved acks, and no inflight IO.
received
With above fixed:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h |  1 +
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 42
> ++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 39 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 244de46..4fb1736 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -65,6 +65,7 @@
>  #define IFCVF_MEDIATED_VRING		0x200000000000
> 
>  #define IFCVF_32_BIT_MASK		0xffffffff
> +#define IFCVF_16_BIT_MASK		0xffff
> 
> 
>  #ifndef VHOST_USER_PROTOCOL_F_CONFIG
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 509a1ed..3e78c7d 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -316,8 +316,34 @@ struct rte_vdpa_dev_info {
>  	uint64_t features = 0;
>  	uint64_t log_base = 0, log_size = 0;
>  	uint64_t len;
> +	u32 ring_state = 0;
> 
>  	vid = internal->vid;
> +
> +	/* to make sure no packet is lost for blk device
> +	 * do not stop until last_avail_idx == last_used_idx
> +	 */
> +	if (internal->hw.device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			do {
> +				if (hw->lm_cfg != NULL)
> +					ring_state = *(u32 *)(hw->lm_cfg +
> +						IFCVF_LM_RING_STATE_OFFSET +
> +						i * IFCVF_LM_CFG_SIZE);
> +				hw->vring[i].last_avail_idx =
> +					(u16)(ring_state & IFCVF_16_BIT_MASK);
> +				hw->vring[i].last_used_idx =
> +					(u16)(ring_state >> 16);
> +				if (hw->vring[i].last_avail_idx !=
> +					hw->vring[i].last_used_idx) {
> +					ifcvf_notify_queue(hw, i);
> +					usleep(10);
> +				}
> +			} while (hw->vring[i].last_avail_idx !=
> +				hw->vring[i].last_used_idx);
> +		}
> +	}
> +
>  	ifcvf_stop_hw(hw);
> 
>  	for (i = 0; i < hw->nr_vring; i++)
> @@ -641,8 +667,10 @@ struct rte_vdpa_dev_info {
>  		}
>  		hw->vring[i].avail = gpa;
> 
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NET: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) {
>  			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>  			if (gpa == 0) {
>  				DRV_LOG(ERR, "Fail to get GPA for used ring.");
> @@ -692,8 +720,12 @@ struct rte_vdpa_dev_info {
> 
>  	for (i = 0; i < hw->nr_vring; i++) {
>  		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (internal->hw.device_type == IFCVF_NET) {
> +			if ((i & 1) == 0)
> +				update_used_ring(internal, i);
> +		} else if (internal->hw.device_type == IFCVF_BLK) {
>  			update_used_ring(internal, i);
> +		}
> 
>  		rte_vhost_get_vhost_vring(vid, i, &vq);
>  		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -755,7 +787,9 @@ struct rte_vdpa_dev_info {
>  		}
>  	}
> 
> -	for (qid = 0; qid < q_num; qid += 2) {
> +	for (qid = 0; qid < q_num; qid += 1) {
> +		if ((internal->hw.device_type == IFCVF_NET) && (qid & 1))
> +			continue;
>  		ev.events = EPOLLIN | EPOLLPRI;
>  		/* leave a flag to mark it's for interrupt */
>  		ev.data.u64 = 1 | qid << 1 |
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration
  2022-05-23  5:25       ` Xia, Chenbo
@ 2022-05-23  5:31         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  5:31 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Thanks for your reply, I will fix them in next version.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 1:25 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration
> >
> > Add SW live-migration support to block device.
> > For block device, it is critical that no packet should be dropped. So
> > when virtio blk device is pause, make sure hardware last_avail_idx and
> 
> paused
> 
> > last_used_idx are the same. This indicates all requests have recieved
> > acks, and no inflight IO.
> 
> received
> 
> With above fixed:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.h |  1 +
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 42
> > ++++++++++++++++++++++++++++++++++++++----
> >  2 files changed, 39 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 244de46..4fb1736 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -65,6 +65,7 @@
> >  #define IFCVF_MEDIATED_VRING		0x200000000000
> >
> >  #define IFCVF_32_BIT_MASK		0xffffffff
> > +#define IFCVF_16_BIT_MASK		0xffff
> >
> >
> >  #ifndef VHOST_USER_PROTOCOL_F_CONFIG
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 509a1ed..3e78c7d 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -316,8 +316,34 @@ struct rte_vdpa_dev_info {
> >  	uint64_t features = 0;
> >  	uint64_t log_base = 0, log_size = 0;
> >  	uint64_t len;
> > +	u32 ring_state = 0;
> >
> >  	vid = internal->vid;
> > +
> > +	/* to make sure no packet is lost for blk device
> > +	 * do not stop until last_avail_idx == last_used_idx
> > +	 */
> > +	if (internal->hw.device_type == IFCVF_BLK) {
> > +		for (i = 0; i < hw->nr_vring; i++) {
> > +			do {
> > +				if (hw->lm_cfg != NULL)
> > +					ring_state = *(u32 *)(hw->lm_cfg +
> > +
> 	IFCVF_LM_RING_STATE_OFFSET +
> > +						i * IFCVF_LM_CFG_SIZE);
> > +				hw->vring[i].last_avail_idx =
> > +					(u16)(ring_state &
> IFCVF_16_BIT_MASK);
> > +				hw->vring[i].last_used_idx =
> > +					(u16)(ring_state >> 16);
> > +				if (hw->vring[i].last_avail_idx !=
> > +					hw->vring[i].last_used_idx) {
> > +					ifcvf_notify_queue(hw, i);
> > +					usleep(10);
> > +				}
> > +			} while (hw->vring[i].last_avail_idx !=
> > +				hw->vring[i].last_used_idx);
> > +		}
> > +	}
> > +
> >  	ifcvf_stop_hw(hw);
> >
> >  	for (i = 0; i < hw->nr_vring; i++)
> > @@ -641,8 +667,10 @@ struct rte_vdpa_dev_info {
> >  		}
> >  		hw->vring[i].avail = gpa;
> >
> > -		/* Direct I/O for Tx queue, relay for Rx queue */
> > -		if (i & 1) {
> > +		/* NET: Direct I/O for Tx queue, relay for Rx queue
> > +		 * BLK: relay every queue
> > +		 */
> > +		if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) {
> >  			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> >  			if (gpa == 0) {
> >  				DRV_LOG(ERR, "Fail to get GPA for used
> ring."); @@ -692,8 +720,12
> > @@ struct rte_vdpa_dev_info {
> >
> >  	for (i = 0; i < hw->nr_vring; i++) {
> >  		/* synchronize remaining new used entries if any */
> > -		if ((i & 1) == 0)
> > +		if (internal->hw.device_type == IFCVF_NET) {
> > +			if ((i & 1) == 0)
> > +				update_used_ring(internal, i);
> > +		} else if (internal->hw.device_type == IFCVF_BLK) {
> >  			update_used_ring(internal, i);
> > +		}
> >
> >  		rte_vhost_get_vhost_vring(vid, i, &vq);
> >  		len = IFCVF_USED_RING_LEN(vq.size); @@ -755,7 +787,9
> @@ struct
> > rte_vdpa_dev_info {
> >  		}
> >  	}
> >
> > -	for (qid = 0; qid < q_num; qid += 2) {
> > +	for (qid = 0; qid < q_num; qid += 1) {
> > +		if ((internal->hw.device_type == IFCVF_NET) && (qid & 1))
> > +			continue;
> >  		ev.events = EPOLLIN | EPOLLPRI;
> >  		/* leave a flag to mark it's for interrupt */
> >  		ev.data.u64 = 1 | qid << 1 |
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 07/13] vhost: add API to get vDPA device type
  2022-05-18 12:13     ` [PATCH v8 07/13] vhost: add API to get vDPA device type Andy Pei
@ 2022-05-23  7:26       ` Xia, Chenbo
  2022-05-23  8:23         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  7:26 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 07/13] vhost: add API to get vDPA device type
> 
> Vhost backend of different devices have different features.
> Add a API to get vDPA device type, net device or blk device
an API
> currently, so users can set different features for different
> kinds of devices.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  doc/guides/prog_guide/vhost_lib.rst    |  5 ++++
>  doc/guides/rel_notes/release_22_07.rst |  4 ++++
>  lib/vhost/rte_vhost.h                  | 17 +++++++++++++
>  lib/vhost/socket.c                     | 44
> ++++++++++++++++++++++++++++++++++
>  lib/vhost/vdpa_driver.h                |  3 +++
>  lib/vhost/version.map                  |  1 +
>  6 files changed, 74 insertions(+)
> 
> diff --git a/doc/guides/prog_guide/vhost_lib.rst
> b/doc/guides/prog_guide/vhost_lib.rst
> index f287b76..0337b38 100644
> --- a/doc/guides/prog_guide/vhost_lib.rst
> +++ b/doc/guides/prog_guide/vhost_lib.rst
> @@ -282,6 +282,11 @@ The following is an overview of some key Vhost API
> functions:
>    Clear inflight packets which are submitted to DMA engine in vhost async
> data
>    path. Completed packets are returned to applications through ``pkts``.
> 
> +* ``rte_vhost_driver_get_vdpa_dev_type(path, type)``
> +
> +  Get device type of vDPA device, such as VDPA_DEVICE_TYPE_NET,
> +  VDPA_DEVICE_TYPE_BLK.
> +
>  Vhost-user Implementations
>  --------------------------
> 
> diff --git a/doc/guides/rel_notes/release_22_07.rst
> b/doc/guides/rel_notes/release_22_07.rst
> index e49cace..9550977 100644
> --- a/doc/guides/rel_notes/release_22_07.rst
> +++ b/doc/guides/rel_notes/release_22_07.rst
> @@ -104,6 +104,10 @@ New Features
>    * ``RTE_EVENT_QUEUE_ATTR_WEIGHT``
>    * ``RTE_EVENT_QUEUE_ATTR_AFFINITY``
> 
> +* **Added vhost API to get the device type of a vDPA device.**
> +
> +  Added an API which can get the device type of vDPA device.
> +
Release note has a requirement of order, check comment under 'New features'
in release notes.
Based on these comments, you should put this after
'Added vhost API to get the number of in-flight packets'
> 
>  Removed Items
>  -------------
> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
> index c733f85..c977a24 100644
> --- a/lib/vhost/rte_vhost.h
> +++ b/lib/vhost/rte_vhost.h
> @@ -117,6 +117,9 @@
> 
>  #define RTE_MAX_VHOST_DEVICE	1024
> 
> +#define VDPA_DEVICE_TYPE_NET 0
> +#define VDPA_DEVICE_TYPE_BLK 1
These will be new APIs of vhost lib. I suggest to rename them by adding
prefix 'RTE_VHOST_'
Thanks,
Chenbo
> +
>  struct rte_vdpa_device;
> 
>  /**
> @@ -486,6 +489,20 @@ struct rte_vdpa_device *
>  rte_vhost_driver_get_vdpa_device(const char *path);
> 
>  /**
> + * Get the device type of the vdpa device.
> + *
> + * @param path
> + *  The vhost-user socket file path
> + * @param type
> + *  the device type of the vdpa device
> + * @return
> + *  0 on success, -1 on failure
> + */
> +__rte_experimental
> +int
> +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
> +
> +/**
>   * Set the feature bits the vhost-user driver supports.
>   *
>   * @param path
> diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
> index b304339..ef0f401 100644
> --- a/lib/vhost/socket.c
> +++ b/lib/vhost/socket.c
> @@ -619,6 +619,50 @@ struct rte_vdpa_device *
>  }
> 
>  int
> +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
> +{
> +	struct vhost_user_socket *vsocket;
> +	struct rte_vdpa_device *vdpa_dev;
> +	uint32_t vdpa_type = 0;
> +	int ret = 0;
> +
> +	pthread_mutex_lock(&vhost_user.mutex);
> +	vsocket = find_vhost_user_socket(path);
> +	if (!vsocket) {
> +		VHOST_LOG_CONFIG(ERR,
> +				 "(%s) socket file is not registered yet.\n",
> +				 path);
> +		ret = -1;
> +		goto unlock_exit;
> +	}
> +
> +	vdpa_dev = vsocket->vdpa_dev;
> +	if (!vdpa_dev) {
> +		ret = -1;
> +		goto unlock_exit;
> +	}
> +
> +	if (vdpa_dev->ops->get_dev_type) {
> +		ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
> +		if (ret) {
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) failed to get vdpa dev type for
> socket file.\n",
> +					 path);
> +			ret = -1;
> +			goto unlock_exit;
> +		}
> +	} else {
> +		vdpa_type = VDPA_DEVICE_TYPE_NET;
> +	}
> +
> +	*type = vdpa_type;
> +
> +unlock_exit:
> +	pthread_mutex_unlock(&vhost_user.mutex);
> +	return ret;
> +}
> +
> +int
>  rte_vhost_driver_disable_features(const char *path, uint64_t features)
>  {
>  	struct vhost_user_socket *vsocket;
> diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
> index e59a834..9cbd7cd 100644
> --- a/lib/vhost/vdpa_driver.h
> +++ b/lib/vhost/vdpa_driver.h
> @@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
>  	/** Set the device configuration space */
>  	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
>  		      uint32_t size, uint32_t flags);
> +
> +	/** get device type: net device, blk device... */
> +	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
>  };
> 
>  /**
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index 5841315..583b4f3 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -90,6 +90,7 @@ EXPERIMENTAL {
> 
>  	# added in 22.07
>  	rte_vhost_async_get_inflight_thread_unsafe;
> +	rte_vhost_driver_get_vdpa_dev_type;
> 
>  };
> 
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver
  2022-05-18 12:13     ` [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-05-23  7:30       ` Xia, Chenbo
  2022-05-23  8:31         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  7:30 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver
> 
> Add get device type ops to ifc driver.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 3e78c7d..0acfa8c 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1189,6 +1189,29 @@ struct rte_vdpa_dev_info {
>  	return 0;
>  }
> 
> +static int
> +ifcvf_get_device_type(struct rte_vdpa_device *vdev,
> +	uint32_t *type)
> +{
> +	struct ifcvf_internal *internal;
> +	struct internal_list *list;
> +
> +	list = find_internal_resource_by_vdev(vdev);
> +	if (list == NULL) {
> +		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
> +		return -1;
> +	}
> +
> +	internal = list->internal;
> +
> +	if (internal->hw.device_type == IFCVF_BLK)
> +		*type = VDPA_DEVICE_TYPE_BLK;
> +	else
> +		*type = VDPA_DEVICE_TYPE_NET;
> +
> +	return 0;
> +}
> +
>  static struct rte_vdpa_dev_ops ifcvf_ops = {
>  	.get_queue_num = ifcvf_get_queue_num,
>  	.get_features = ifcvf_get_vdpa_features,
> @@ -1201,6 +1224,7 @@ struct rte_vdpa_dev_info {
>  	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
>  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
>  	.get_notify_area = ifcvf_get_notify_area,
> +	.get_dev_type = ifcvf_get_device_type,
>  };
> 
>  static inline int
> @@ -1327,6 +1351,7 @@ struct rte_vdpa_dev_info {
>  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
>  	.get_notify_area = ifcvf_get_notify_area,
>  	.get_config = ifcvf_blk_get_config,
> +	.get_dev_type = ifcvf_get_device_type,
>  };
> 
>  struct rte_vdpa_dev_info dev_info[] = {
> --
> 1.8.3.1
With renaming fixed mentioned in patch 7:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 09/13] examples/vdpa: add add virtio blk support
  2022-05-18 12:13     ` [PATCH v8 09/13] examples/vdpa: add add virtio blk support Andy Pei
@ 2022-05-23  7:40       ` Xia, Chenbo
  2022-05-23  8:38         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  7:40 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 09/13] examples/vdpa: add add virtio blk support
Double add in the title
> 
> Add virtio blk device support to vDPA example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  examples/vdpa/main.c             | 56
> ++++++++++++++++++++++++++++++++++++++
>  examples/vdpa/vdpa_blk_compact.h | 58
> ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 114 insertions(+)
>  create mode 100644 examples/vdpa/vdpa_blk_compact.h
> 
> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
> index 5ab0765..62b6660 100644
> --- a/examples/vdpa/main.c
> +++ b/examples/vdpa/main.c
> @@ -20,6 +20,7 @@
>  #include <cmdline_parse_string.h>
>  #include <cmdline_parse_num.h>
>  #include <cmdline.h>
> +#include "vdpa_blk_compact.h"
> 
>  #define MAX_PATH_LEN 128
>  #define MAX_VDPA_SAMPLE_PORTS 1024
> @@ -159,8 +160,53 @@ struct vdpa_port {
>  };
> 
>  static int
> +vdpa_blk_device_set_features_and_protocol(const char *path)
> +{
> +	uint64_t protocol_features = 0;
> +	int ret;
> +
> +	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_disable_features(path,
> +		VHOST_BLK_DISABLED_FEATURES);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_disable_features for %s failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	ret = rte_vhost_driver_get_protocol_features(path,
> &protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_get_protocol_features for %s
> failed.\n",
> +			path);
> +		goto out;
> +	}
> +
> +	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
> +
> +	ret = rte_vhost_driver_set_protocol_features(path,
> protocol_features);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VDPA,
> +			"rte_vhost_driver_set_protocol_features for %s
> failed.\n",
> +			path);
> +	}
> +
> +out:
> +	return ret;
> +}
> +
> +static int
>  start_vdpa(struct vdpa_port *vport)
>  {
> +	uint32_t device_type = 0;
>  	int ret;
>  	char *socket_path = vport->ifname;
> 
> @@ -192,6 +238,16 @@ struct vdpa_port {
>  			"attach vdpa device failed: %s\n",
>  			socket_path);
> 
> +	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
> +	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
> +		RTE_LOG(NOTICE, VDPA, "%s is a blk device\n", socket_path);
> +		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +				"set vhost blk driver features and protocol
> features failed: %s\n",
> +				socket_path);
> +	}
> +
>  	if (rte_vhost_driver_start(socket_path) < 0)
>  		rte_exit(EXIT_FAILURE,
>  			"start vhost driver failed: %s\n",
> diff --git a/examples/vdpa/vdpa_blk_compact.h
> b/examples/vdpa/vdpa_blk_compact.h
> new file mode 100644
> index 0000000..4193561
> --- /dev/null
> +++ b/examples/vdpa/vdpa_blk_compact.h
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _VDPA_BLK_COMPACT_H_
> +#define _VDPA_BLK_COMPACT_H_
> +
> +/**
> + * @file
> + *
> + * Device specific vhost lib
> + */
You describe the file with more details, or maybe just delete it.
> +
> +#include <rte_vhost.h>
> +
> +/* Feature bits */
> +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size
> */
> +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments
> */
> +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> */
> +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> available */
> +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> +
> +/* Legacy feature bits */
> +#ifndef VIRTIO_BLK_NO_LEGACY
This is always true? Is it possible VIRTIO_BLK_NO_LEGACY get defined?
Thanks,
Chenbo
> +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> */
> +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> config */
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +
> +#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
> +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> +	(1ULL << VIRTIO_F_VERSION_1))
> +
> +#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL <<
> VIRTIO_F_NOTIFY_ON_EMPTY) | \
> +	(1ULL << VIRTIO_RING_F_EVENT_IDX))
> +
> +#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
> +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
> \
> +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  |
> \
> +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE)
> | \
> +	(1ULL << VIRTIO_BLK_F_MQ))
> +
> +/* Not supported features */
> +#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
> +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
> +	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
> +
> +/* Vhost-blk support protocol features */
> +#define VHOST_BLK_PROTOCOL_FEATURES \
> +	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
> +	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
> +
> +#endif /* _VDPA_BLK_COMPACT_H_ */
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 10/13] usertools: add support for virtio blk device
  2022-05-18 12:13     ` [PATCH v8 10/13] usertools: add support for virtio blk device Andy Pei
@ 2022-05-23  7:43       ` Xia, Chenbo
  2022-05-23  8:49         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  7:43 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 10/13] usertools: add support for virtio blk device
> 
> Add virtio blk device support to devbind.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  usertools/dpdk-devbind.py | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
> index ace4627..7231be4 100755
> --- a/usertools/dpdk-devbind.py
> +++ b/usertools/dpdk-devbind.py
> @@ -72,6 +72,9 @@
>  cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
>                   'SVendor': None, 'SDevice': None}
> 
> +virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
No modern device support?
Thanks,
Chenbo
> +                    'SVendor': None, 'SDevice': None}
> +
>  network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
>  baseband_devices = [acceleration_class]
>  crypto_devices = [encryption_class, intel_processor_class]
> @@ -82,7 +85,7 @@
>  compress_devices = [cavium_zip]
>  regex_devices = [cn9k_ree]
>  misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
> -                intel_ntb_skx, intel_ntb_icx]
> +                intel_ntb_skx, intel_ntb_icx, virtio_blk]
> 
>  # global dict ethernet devices present. Dictionary indexed by PCI address.
>  # Each device within this is itself a dictionary of device properties
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 11/13] vdpa/ifc: add log for config space of virtio blk
  2022-05-18 12:13     ` [PATCH v8 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
@ 2022-05-23  7:46       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  7:46 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 11/13] vdpa/ifc: add log for config space of virtio blk
> 
> Add some log of virtio blk device config space information
> at VDPA launch before qemu connects.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 34 ++++++++++++++++++++++++++++++++++
>  1 file changed, 34 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 0acfa8c..376a1af 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1382,6 +1382,9 @@ struct rte_vdpa_dev_info dev_info[] = {
>  	struct rte_kvargs *kvlist = NULL;
>  	int ret = 0;
>  	int16_t device_id;
> +	uint64_t capacity = 0;
> +	uint8_t *byte;
> +	uint32_t i;
> 
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
> @@ -1448,6 +1451,37 @@ struct rte_vdpa_dev_info dev_info[] = {
>  		internal->features = features &
>  					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
>  		internal->features |= dev_info[IFCVF_BLK].features;
> +
> +		/* cannot read 64-bit register in one attempt,
> +		 * so read byte by byte.
> +		 */
> +		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> +			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> +			capacity |= (uint64_t)*byte << (i * 8);
> +		}
> +		/* The capacity is number of sectors in 512-byte.
> +		 * So right shift 1 bit  we get in K,
> +		 * another right shift 10 bits we get in M,
> +		 * right shift 10 more bits, we get in G.
> +		 * To show capacity in G, we right shift 21 bits in total.
> +		 */
> +		DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
> +
> +		DRV_LOG(DEBUG, "size_max  : 0x%08x",
> +			internal->hw.blk_cfg->size_max);
> +		DRV_LOG(DEBUG, "seg_max   : 0x%08x",
> +			internal->hw.blk_cfg->seg_max);
> +		DRV_LOG(DEBUG, "blk_size  : 0x%08x",
> +			internal->hw.blk_cfg->blk_size);
> +		DRV_LOG(DEBUG, "geometry");
> +		DRV_LOG(DEBUG, "    cylinders: %u",
> +			internal->hw.blk_cfg->geometry.cylinders);
> +		DRV_LOG(DEBUG, "    heads    : %u",
> +			internal->hw.blk_cfg->geometry.heads);
> +		DRV_LOG(DEBUG, "    sectors  : %u",
> +			internal->hw.blk_cfg->geometry.sectors);
> +		DRV_LOG(DEBUG, "num_queues: 0x%08x",
> +			internal->hw.blk_cfg->num_queues);
>  	}
> 
>  	list->internal = internal;
> --
> 1.8.3.1
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 12/13] vdpa/ifc: add interrupt handling for config space
  2022-05-18 12:13     ` [PATCH v8 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
@ 2022-05-23  7:54       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  7:54 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 12/13] vdpa/ifc: add interrupt handling for config
> space
> 
> Create a thread to poll and relay config space change interrupt.
> Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to inform QEMU.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 118
> +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 117 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 376a1af..8a49622 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -53,7 +53,9 @@ struct ifcvf_internal {
>  	int vfio_group_fd;
>  	int vfio_dev_fd;
>  	pthread_t tid;	/* thread for notify relay */
> +	pthread_t intr_tid; /* thread for config space change interrupt
> relay */
>  	int epfd;
> +	int csc_epfd;
>  	int vid;
>  	struct rte_vdpa_device *vdev;
>  	uint16_t max_queues;
> @@ -566,6 +568,111 @@ struct rte_vdpa_dev_info {
>  	return 0;
>  }
> 
> +static void
> +virtio_interrupt_handler(struct ifcvf_internal *internal)
> +{
> +	int vid = internal->vid;
> +	int ret;
> +
> +	ret = rte_vhost_slave_config_change(vid, 1);
> +	if (ret)
> +		DRV_LOG(ERR, "failed to notify the guest about configuration
> space change.");
> +}
> +
> +static void *
> +intr_relay(void *arg)
> +{
> +	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
> +	struct epoll_event csc_event;
> +	struct epoll_event ev;
> +	uint64_t buf;
> +	int nbytes;
> +	int csc_epfd, csc_val = 0;
> +
> +	csc_epfd = epoll_create(1);
> +	if (csc_epfd < 0) {
> +		DRV_LOG(ERR, "failed to create epoll for config space
> change.");
> +		return NULL;
> +	}
> +
> +	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
> +	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
> +	if (epoll_ctl(csc_epfd, EPOLL_CTL_ADD,
> +		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
> +		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> +		return NULL;
> +	}
> +
> +	internal->csc_epfd = csc_epfd;
> +
> +	for (;;) {
> +		csc_val = epoll_wait(csc_epfd, &csc_event, 1, -1);
> +		if (csc_val < 0) {
> +			if (errno == EINTR)
> +				continue;
> +			DRV_LOG(ERR, "epoll_wait return fail.");
> +			return NULL;
> +		} else if (csc_val == 0) {
> +			continue;
> +		} else {
> +			/* csc_val > 0 */
> +			nbytes = read(csc_event.data.fd, &buf, 8);
> +			if (nbytes < 0) {
> +				if (errno == EINTR ||
> +				    errno == EWOULDBLOCK ||
> +				    errno == EAGAIN)
> +					continue;
> +				DRV_LOG(ERR, "Error reading from file
> descriptor %d: %s\n",
> +					csc_event.data.fd,
> +					strerror(errno));
> +				return NULL;
> +			} else if (nbytes == 0) {
> +				DRV_LOG(ERR, "Read nothing from file
> descriptor %d\n",
> +					csc_event.data.fd);
> +				continue;
> +			} else {
> +				virtio_interrupt_handler(internal);
> +			}
> +		}
> +	}
> +
> +	return NULL;
> +}
I think we should not assume unset_intr_relay will help us close epfd when
Error happens, so just close it when there's some error.
> +
> +static int
> +setup_intr_relay(struct ifcvf_internal *internal)
> +{
> +	char name[THREAD_NAME_LEN];
> +	int ret;
> +
> +	snprintf(name, sizeof(name), "ifc-intr-%d", internal->vid);
> +	ret = rte_ctrl_thread_create(&internal->intr_tid, name, NULL,
> +				     intr_relay, (void *)internal);
> +	if (ret) {
> +		DRV_LOG(ERR, "failed to create notify relay pthread.");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static int
> +unset_intr_relay(struct ifcvf_internal *internal)
> +{
> +	void *status;
> +
> +	if (internal->intr_tid) {
> +		pthread_cancel(internal->intr_tid);
> +		pthread_join(internal->intr_tid, &status);
> +	}
> +	internal->intr_tid = 0;
> +
> +	if (internal->csc_epfd >= 0)
> +		close(internal->csc_epfd);
> +	internal->csc_epfd = -1;
> +
> +	return 0;
> +}
It will always return 0, so return type should be void
> +
>  static int
>  update_datapath(struct ifcvf_internal *internal)
>  {
> @@ -592,10 +699,16 @@ struct rte_vdpa_dev_info {
>  		if (ret)
>  			goto err;
> 
> +		ret = setup_intr_relay(internal);
> +		if (ret)
> +			goto err;
> +
>  		rte_atomic32_set(&internal->running, 1);
>  	} else if (rte_atomic32_read(&internal->running) &&
>  		   (!rte_atomic32_read(&internal->started) ||
>  		    !rte_atomic32_read(&internal->dev_attached))) {
> +		ret = unset_intr_relay(internal);
This will be changed accordingly.
Thanks,
Chenbo
> +
>  		ret = unset_notify_relay(internal);
>  		if (ret)
>  			goto err;
> @@ -812,7 +925,7 @@ struct rte_vdpa_dev_info {
>  		if (nfds < 0) {
>  			if (errno == EINTR)
>  				continue;
> -			DRV_LOG(ERR, "epoll_wait return fail\n");
> +			DRV_LOG(ERR, "epoll_wait return fail.");
>  			return NULL;
>  		}
> 
> @@ -888,6 +1001,9 @@ struct rte_vdpa_dev_info {
>  	/* stop the direct IO data path */
>  	unset_notify_relay(internal);
>  	vdpa_ifcvf_stop(internal);
> +
> +	unset_intr_relay(internal);
> +
>  	vdpa_disable_vfio_intr(internal);
> 
>  	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk device
  2022-05-18 12:13     ` [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-05-23  7:55       ` Xia, Chenbo
  2022-05-23  9:03         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-23  7:55 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Wednesday, May 18, 2022 8:14 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk
> device
> 
> Register address is different between net and blk device.
> We are re-use most of the code, when register address is
Re-using
With this fixed:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> different, we have to check net and blk device go through
> different code.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.c | 34 +++++++++++++++++++++++++++-------
>  1 file changed, 27 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index d10c1fd..dd475a7 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -218,10 +218,17 @@
>  				&cfg->queue_used_hi);
>  		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
> 
> -		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
> -			(u32)hw->vring[i].last_avail_idx |
> -			((u32)hw->vring[i].last_used_idx << 16);
> +		if (hw->device_type == IFCVF_BLK)
> +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> +				i * IFCVF_LM_CFG_SIZE) =
> +				(u32)hw->vring[i].last_avail_idx |
> +				((u32)hw->vring[i].last_used_idx << 16);
> +		else
> +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> +				(i / 2) * IFCVF_LM_CFG_SIZE +
> +				(i % 2) * 4) =
> +				(u32)hw->vring[i].last_avail_idx |
> +				((u32)hw->vring[i].last_used_idx << 16);
> 
>  		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
>  		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
> @@ -254,9 +261,22 @@
>  		IFCVF_WRITE_REG16(i, &cfg->queue_select);
>  		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
>  		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg-
> >queue_msix_vector);
> -		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET
> +
> -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
> -		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
> +
> +		if (hw->device_type == IFCVF_BLK)
> +			ring_state = *(u32 *)(hw->lm_cfg +
> +					IFCVF_LM_RING_STATE_OFFSET +
> +					i * IFCVF_LM_CFG_SIZE);
> +		else
> +			ring_state = *(u32 *)(hw->lm_cfg +
> +					IFCVF_LM_RING_STATE_OFFSET +
> +					(i / 2) * IFCVF_LM_CFG_SIZE +
> +					(i % 2) * 4);
> +
> +		if (hw->device_type == IFCVF_BLK)
> +			hw->vring[i].last_avail_idx =
> +				(u16)(ring_state & IFCVF_16_BIT_MASK);
> +		else
> +			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
>  		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
>  	}
>  }
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 07/13] vhost: add API to get vDPA device type
  2022-05-23  7:26       ` Xia, Chenbo
@ 2022-05-23  8:23         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  8:23 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Thanks for your reply.
I will send a new version to address these.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 3:26 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 07/13] vhost: add API to get vDPA device type
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 07/13] vhost: add API to get vDPA device type
> >
> > Vhost backend of different devices have different features.
> > Add a API to get vDPA device type, net device or blk device
> 
> an API
> 
> > currently, so users can set different features for different kinds of
> > devices.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  doc/guides/prog_guide/vhost_lib.rst    |  5 ++++
> >  doc/guides/rel_notes/release_22_07.rst |  4 ++++
> >  lib/vhost/rte_vhost.h                  | 17 +++++++++++++
> >  lib/vhost/socket.c                     | 44
> > ++++++++++++++++++++++++++++++++++
> >  lib/vhost/vdpa_driver.h                |  3 +++
> >  lib/vhost/version.map                  |  1 +
> >  6 files changed, 74 insertions(+)
> >
> > diff --git a/doc/guides/prog_guide/vhost_lib.rst
> > b/doc/guides/prog_guide/vhost_lib.rst
> > index f287b76..0337b38 100644
> > --- a/doc/guides/prog_guide/vhost_lib.rst
> > +++ b/doc/guides/prog_guide/vhost_lib.rst
> > @@ -282,6 +282,11 @@ The following is an overview of some key Vhost
> > API
> > functions:
> >    Clear inflight packets which are submitted to DMA engine in vhost
> > async data
> >    path. Completed packets are returned to applications through ``pkts``.
> >
> > +* ``rte_vhost_driver_get_vdpa_dev_type(path, type)``
> > +
> > +  Get device type of vDPA device, such as VDPA_DEVICE_TYPE_NET,
> > + VDPA_DEVICE_TYPE_BLK.
> > +
> >  Vhost-user Implementations
> >  --------------------------
> >
> > diff --git a/doc/guides/rel_notes/release_22_07.rst
> > b/doc/guides/rel_notes/release_22_07.rst
> > index e49cace..9550977 100644
> > --- a/doc/guides/rel_notes/release_22_07.rst
> > +++ b/doc/guides/rel_notes/release_22_07.rst
> > @@ -104,6 +104,10 @@ New Features
> >    * ``RTE_EVENT_QUEUE_ATTR_WEIGHT``
> >    * ``RTE_EVENT_QUEUE_ATTR_AFFINITY``
> >
> > +* **Added vhost API to get the device type of a vDPA device.**
> > +
> > +  Added an API which can get the device type of vDPA device.
> > +
> 
> Release note has a requirement of order, check comment under 'New
> features'
> in release notes.
> 
> Based on these comments, you should put this after 'Added vhost API to get
> the number of in-flight packets'
> 
> >
> >  Removed Items
> >  -------------
> > diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> > c733f85..c977a24 100644
> > --- a/lib/vhost/rte_vhost.h
> > +++ b/lib/vhost/rte_vhost.h
> > @@ -117,6 +117,9 @@
> >
> >  #define RTE_MAX_VHOST_DEVICE	1024
> >
> > +#define VDPA_DEVICE_TYPE_NET 0
> > +#define VDPA_DEVICE_TYPE_BLK 1
> 
> These will be new APIs of vhost lib. I suggest to rename them by adding
> prefix 'RTE_VHOST_'
> 
> Thanks,
> Chenbo
> 
> > +
> >  struct rte_vdpa_device;
> >
> >  /**
> > @@ -486,6 +489,20 @@ struct rte_vdpa_device *
> > rte_vhost_driver_get_vdpa_device(const char *path);
> >
> >  /**
> > + * Get the device type of the vdpa device.
> > + *
> > + * @param path
> > + *  The vhost-user socket file path
> > + * @param type
> > + *  the device type of the vdpa device
> > + * @return
> > + *  0 on success, -1 on failure
> > + */
> > +__rte_experimental
> > +int
> > +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
> > +
> > +/**
> >   * Set the feature bits the vhost-user driver supports.
> >   *
> >   * @param path
> > diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c index
> > b304339..ef0f401 100644
> > --- a/lib/vhost/socket.c
> > +++ b/lib/vhost/socket.c
> > @@ -619,6 +619,50 @@ struct rte_vdpa_device *  }
> >
> >  int
> > +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
> > +{
> > +	struct vhost_user_socket *vsocket;
> > +	struct rte_vdpa_device *vdpa_dev;
> > +	uint32_t vdpa_type = 0;
> > +	int ret = 0;
> > +
> > +	pthread_mutex_lock(&vhost_user.mutex);
> > +	vsocket = find_vhost_user_socket(path);
> > +	if (!vsocket) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +				 "(%s) socket file is not registered yet.\n",
> > +				 path);
> > +		ret = -1;
> > +		goto unlock_exit;
> > +	}
> > +
> > +	vdpa_dev = vsocket->vdpa_dev;
> > +	if (!vdpa_dev) {
> > +		ret = -1;
> > +		goto unlock_exit;
> > +	}
> > +
> > +	if (vdpa_dev->ops->get_dev_type) {
> > +		ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
> > +		if (ret) {
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) failed to get vdpa dev type for
> > socket file.\n",
> > +					 path);
> > +			ret = -1;
> > +			goto unlock_exit;
> > +		}
> > +	} else {
> > +		vdpa_type = VDPA_DEVICE_TYPE_NET;
> > +	}
> > +
> > +	*type = vdpa_type;
> > +
> > +unlock_exit:
> > +	pthread_mutex_unlock(&vhost_user.mutex);
> > +	return ret;
> > +}
> > +
> > +int
> >  rte_vhost_driver_disable_features(const char *path, uint64_t
> > features)  {
> >  	struct vhost_user_socket *vsocket;
> > diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index
> > e59a834..9cbd7cd 100644
> > --- a/lib/vhost/vdpa_driver.h
> > +++ b/lib/vhost/vdpa_driver.h
> > @@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
> >  	/** Set the device configuration space */
> >  	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
> >  		      uint32_t size, uint32_t flags);
> > +
> > +	/** get device type: net device, blk device... */
> > +	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
> >  };
> >
> >  /**
> > diff --git a/lib/vhost/version.map b/lib/vhost/version.map index
> > 5841315..583b4f3 100644
> > --- a/lib/vhost/version.map
> > +++ b/lib/vhost/version.map
> > @@ -90,6 +90,7 @@ EXPERIMENTAL {
> >
> >  	# added in 22.07
> >  	rte_vhost_async_get_inflight_thread_unsafe;
> > +	rte_vhost_driver_get_vdpa_dev_type;
> >
> >  };
> >
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver
  2022-05-23  7:30       ` Xia, Chenbo
@ 2022-05-23  8:31         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  8:31 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Sure
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 3:30 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc
> > driver
> >
> > Add get device type ops to ifc driver.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> >
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 3e78c7d..0acfa8c 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1189,6 +1189,29 @@ struct rte_vdpa_dev_info {
> >  	return 0;
> >  }
> >
> > +static int
> > +ifcvf_get_device_type(struct rte_vdpa_device *vdev,
> > +	uint32_t *type)
> > +{
> > +	struct ifcvf_internal *internal;
> > +	struct internal_list *list;
> > +
> > +	list = find_internal_resource_by_vdev(vdev);
> > +	if (list == NULL) {
> > +		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
> > +		return -1;
> > +	}
> > +
> > +	internal = list->internal;
> > +
> > +	if (internal->hw.device_type == IFCVF_BLK)
> > +		*type = VDPA_DEVICE_TYPE_BLK;
> > +	else
> > +		*type = VDPA_DEVICE_TYPE_NET;
> > +
> > +	return 0;
> > +}
> > +
> >  static struct rte_vdpa_dev_ops ifcvf_ops = {
> >  	.get_queue_num = ifcvf_get_queue_num,
> >  	.get_features = ifcvf_get_vdpa_features, @@ -1201,6 +1224,7 @@
> > struct rte_vdpa_dev_info {
> >  	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
> >  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> >  	.get_notify_area = ifcvf_get_notify_area,
> > +	.get_dev_type = ifcvf_get_device_type,
> >  };
> >
> >  static inline int
> > @@ -1327,6 +1351,7 @@ struct rte_vdpa_dev_info {
> >  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> >  	.get_notify_area = ifcvf_get_notify_area,
> >  	.get_config = ifcvf_blk_get_config,
> > +	.get_dev_type = ifcvf_get_device_type,
> >  };
> >
> >  struct rte_vdpa_dev_info dev_info[] = {
> > --
> > 1.8.3.1
> 
> With renaming fixed mentioned in patch 7:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 09/13] examples/vdpa: add add virtio blk support
  2022-05-23  7:40       ` Xia, Chenbo
@ 2022-05-23  8:38         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  8:38 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Thanks for your reply, my reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 3:40 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 09/13] examples/vdpa: add add virtio blk support
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 09/13] examples/vdpa: add add virtio blk support
> 
> Double add in the title
> 
I will fix this in next version.
> >
> > Add virtio blk device support to vDPA example.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  examples/vdpa/main.c             | 56
> > ++++++++++++++++++++++++++++++++++++++
> >  examples/vdpa/vdpa_blk_compact.h | 58
> > ++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 114 insertions(+)
> >  create mode 100644 examples/vdpa/vdpa_blk_compact.h
> >
> > diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index
> > 5ab0765..62b6660 100644
> > --- a/examples/vdpa/main.c
> > +++ b/examples/vdpa/main.c
> > @@ -20,6 +20,7 @@
> >  #include <cmdline_parse_string.h>
> >  #include <cmdline_parse_num.h>
> >  #include <cmdline.h>
> > +#include "vdpa_blk_compact.h"
> >
> >  #define MAX_PATH_LEN 128
> >  #define MAX_VDPA_SAMPLE_PORTS 1024
> > @@ -159,8 +160,53 @@ struct vdpa_port {  };
> >
> >  static int
> > +vdpa_blk_device_set_features_and_protocol(const char *path) {
> > +	uint64_t protocol_features = 0;
> > +	int ret;
> > +
> > +	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_disable_features(path,
> > +		VHOST_BLK_DISABLED_FEATURES);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_disable_features for %s failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	ret = rte_vhost_driver_get_protocol_features(path,
> > &protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_get_protocol_features for %s
> > failed.\n",
> > +			path);
> > +		goto out;
> > +	}
> > +
> > +	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
> > +
> > +	ret = rte_vhost_driver_set_protocol_features(path,
> > protocol_features);
> > +	if (ret != 0) {
> > +		RTE_LOG(ERR, VDPA,
> > +			"rte_vhost_driver_set_protocol_features for %s
> > failed.\n",
> > +			path);
> > +	}
> > +
> > +out:
> > +	return ret;
> > +}
> > +
> > +static int
> >  start_vdpa(struct vdpa_port *vport)
> >  {
> > +	uint32_t device_type = 0;
> >  	int ret;
> >  	char *socket_path = vport->ifname;
> >
> > @@ -192,6 +238,16 @@ struct vdpa_port {
> >  			"attach vdpa device failed: %s\n",
> >  			socket_path);
> >
> > +	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path,
> &device_type);
> > +	if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
> > +		RTE_LOG(NOTICE, VDPA, "%s is a blk device\n", socket_path);
> > +		ret =
> vdpa_blk_device_set_features_and_protocol(socket_path);
> > +		if (ret != 0)
> > +			rte_exit(EXIT_FAILURE,
> > +				"set vhost blk driver features and protocol
> > features failed: %s\n",
> > +				socket_path);
> > +	}
> > +
> >  	if (rte_vhost_driver_start(socket_path) < 0)
> >  		rte_exit(EXIT_FAILURE,
> >  			"start vhost driver failed: %s\n", diff --git
> > a/examples/vdpa/vdpa_blk_compact.h
> > b/examples/vdpa/vdpa_blk_compact.h
> > new file mode 100644
> > index 0000000..4193561
> > --- /dev/null
> > +++ b/examples/vdpa/vdpa_blk_compact.h
> > @@ -0,0 +1,58 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation  */
> > +
> > +#ifndef _VDPA_BLK_COMPACT_H_
> > +#define _VDPA_BLK_COMPACT_H_
> > +
> > +/**
> > + * @file
> > + *
> > + * Device specific vhost lib
> > + */
> 
> You describe the file with more details, or maybe just delete it.
> 
I will delete it in next version.
> > +
> > +#include <rte_vhost.h>
> > +
> > +/* Feature bits */
> > +#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment
> size
> > */
> > +#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of
> segments
> > */
> > +#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
> > +#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available
> > */
> > +#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is
> > available */
> > +#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
> > +
> > +/* Legacy feature bits */
> > +#ifndef VIRTIO_BLK_NO_LEGACY
> 
> This is always true? Is it possible VIRTIO_BLK_NO_LEGACY get defined?
> 
The I will delete this condition.
> Thanks,
> Chenbo
> 
> > +#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
> > +#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru
> > */
> > +#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in
> > config */
> > +#endif /* !VIRTIO_BLK_NO_LEGACY */
> > +
> > +#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
> > +	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
> > +	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
> > +	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
> > +	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> > +	(1ULL << VIRTIO_F_VERSION_1))
> > +
> > +#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL <<
> > VIRTIO_F_NOTIFY_ON_EMPTY) | \
> > +	(1ULL << VIRTIO_RING_F_EVENT_IDX))
> > +
> > +#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
> > +	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL <<
> VIRTIO_BLK_F_SEG_MAX) | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_BLK_SIZE) |
> > \
> > +	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL <<
> VIRTIO_BLK_F_BARRIER)  |
> > \
> > +	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE)
> > | \
> > +	(1ULL << VIRTIO_BLK_F_MQ))
> > +
> > +/* Not supported features */
> > +#define VHOST_BLK_DISABLED_FEATURES
> (VHOST_BLK_DISABLED_FEATURES_BASE | \
> > +	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL <<
> VIRTIO_BLK_F_BARRIER) | \
> > +	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL <<
> VIRTIO_BLK_F_CONFIG_WCE))
> > +
> > +/* Vhost-blk support protocol features */ #define
> > +VHOST_BLK_PROTOCOL_FEATURES \
> > +	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
> > +	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
> > +
> > +#endif /* _VDPA_BLK_COMPACT_H_ */
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 10/13] usertools: add support for virtio blk device
  2022-05-23  7:43       ` Xia, Chenbo
@ 2022-05-23  8:49         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  8:49 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 3:43 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 10/13] usertools: add support for virtio blk device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 10/13] usertools: add support for virtio blk device
> >
> > Add virtio blk device support to devbind.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  usertools/dpdk-devbind.py | 5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
> > index ace4627..7231be4 100755
> > --- a/usertools/dpdk-devbind.py
> > +++ b/usertools/dpdk-devbind.py
> > @@ -72,6 +72,9 @@
> >  cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
> >                   'SVendor': None, 'SDevice': None}
> >
> > +virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
> 
> No modern device support?
> 
It should be supported. I will add modern device in next version.
> Thanks,
> Chenbo
> 
> > +                    'SVendor': None, 'SDevice': None}
> > +
> >  network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
> > baseband_devices = [acceleration_class]  crypto_devices =
> > [encryption_class, intel_processor_class] @@ -82,7 +85,7 @@
> > compress_devices = [cavium_zip]  regex_devices = [cn9k_ree]
> > misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
> > -                intel_ntb_skx, intel_ntb_icx]
> > +                intel_ntb_skx, intel_ntb_icx, virtio_blk]
> >
> >  # global dict ethernet devices present. Dictionary indexed by PCI address.
> >  # Each device within this is itself a dictionary of device properties
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk device
  2022-05-23  7:55       ` Xia, Chenbo
@ 2022-05-23  9:03         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-23  9:03 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Thanks for your reply.
I will address in next version.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, May 23, 2022 3:56 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk
> device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Wednesday, May 18, 2022 8:14 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v8 13/13] vdpa/ifc/base: access correct register for
> > blk device
> >
> > Register address is different between net and blk device.
> > We are re-use most of the code, when register address is
> 
> Re-using
> 
> With this fixed:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
> > different, we have to check net and blk device go through different
> > code.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.c | 34 +++++++++++++++++++++++++++-------
> >  1 file changed, 27 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.c
> > b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..dd475a7 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.c
> > +++ b/drivers/vdpa/ifc/base/ifcvf.c
> > @@ -218,10 +218,17 @@
> >  				&cfg->queue_used_hi);
> >  		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
> >
> > -		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> > -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
> > -			(u32)hw->vring[i].last_avail_idx |
> > -			((u32)hw->vring[i].last_used_idx << 16);
> > +		if (hw->device_type == IFCVF_BLK)
> > +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> > +				i * IFCVF_LM_CFG_SIZE) =
> > +				(u32)hw->vring[i].last_avail_idx |
> > +				((u32)hw->vring[i].last_used_idx << 16);
> > +		else
> > +			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
> > +				(i / 2) * IFCVF_LM_CFG_SIZE +
> > +				(i % 2) * 4) =
> > +				(u32)hw->vring[i].last_avail_idx |
> > +				((u32)hw->vring[i].last_used_idx << 16);
> >
> >  		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
> >  		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) == @@ -
> 254,9 +261,22
> > @@
> >  		IFCVF_WRITE_REG16(i, &cfg->queue_select);
> >  		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
> >  		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg-
> > >queue_msix_vector);
> > -		ring_state = *(u32 *)(hw->lm_cfg +
> IFCVF_LM_RING_STATE_OFFSET
> > +
> > -				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
> > -		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
> > +
> > +		if (hw->device_type == IFCVF_BLK)
> > +			ring_state = *(u32 *)(hw->lm_cfg +
> > +					IFCVF_LM_RING_STATE_OFFSET +
> > +					i * IFCVF_LM_CFG_SIZE);
> > +		else
> > +			ring_state = *(u32 *)(hw->lm_cfg +
> > +					IFCVF_LM_RING_STATE_OFFSET +
> > +					(i / 2) * IFCVF_LM_CFG_SIZE +
> > +					(i % 2) * 4);
> > +
> > +		if (hw->device_type == IFCVF_BLK)
> > +			hw->vring[i].last_avail_idx =
> > +				(u16)(ring_state & IFCVF_16_BIT_MASK);
> > +		else
> > +			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
> >  		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
> >  	}
> >  }
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (6 preceding siblings ...)
  2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-05-23  9:35   ` Andy Pei
  2022-05-23  9:35     ` [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (12 more replies)
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  8 siblings, 13 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v9:
 fix some naming issue.
 add more NULL pointer check.
 close open file when errors occur.
v8:
 delete some redundant code.
 fix some commit log.
v7:
 Check on expected fd num in new vhost msg handler.
 Sanity check on vhost msg size.
 Fix typo.
 Add commit log to help understand code.
 Remove duplicated code.
 Add new API to get vDPA device type.
v6:
 fix some commit log.
 add vhost socket in log output to make it more user-friendly.
 when driver ops fail, just output some log, do not break message handler.
 check vhost msg size in msg handler.
v5:
 fix some coding style issues.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.
Andy Pei (13):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support for get/set config
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt relay for blk device
  vdpa/ifc: add block device SW live-migration
  vhost: add API to get vDPA device type
  vdpa/ifc: add get device type ops to ifc driver
  examples/vdpa: add virtio blk support
  usertools: add support for virtio blk device
  vdpa/ifc: add log for config space of virtio blk
  vdpa/ifc: add interrupt handling for config space
  vdpa/ifc/base: access correct register for blk device
 doc/guides/prog_guide/vhost_lib.rst    |   5 +
 doc/guides/rel_notes/release_22_07.rst |   4 +
 drivers/vdpa/ifc/base/ifcvf.c          |  34 ++-
 drivers/vdpa/ifc/base/ifcvf.h          |  27 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c          | 415 +++++++++++++++++++++++++++++++--
 examples/vdpa/main.c                   |  56 +++++
 examples/vdpa/vdpa_blk_compact.h       |  50 ++++
 lib/vhost/rte_vhost.h                  |  17 ++
 lib/vhost/socket.c                     |  44 ++++
 lib/vhost/vdpa_driver.h                |  11 +-
 lib/vhost/version.map                  |   1 +
 lib/vhost/vhost_user.c                 |  88 +++++++
 lib/vhost/vhost_user.h                 |  13 ++
 usertools/dpdk-devbind.py              |   5 +-
 14 files changed, 739 insertions(+), 31 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  2:37       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 02/13] vhost: add vDPA ops for " Andy Pei
                       ` (11 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 22 +++++++---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 95 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 102 insertions(+), 15 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..a761d49 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,12 +5,19 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
-#define IFCVF_VENDOR_ID		0x1AF4
-#define IFCVF_DEVICE_ID		0x1041
-#define IFCVF_SUBSYS_VENDOR_ID	0x8086
-#define IFCVF_SUBSYS_DEVICE_ID	0x001A
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+#define IFCVF_VENDOR_ID                     0x1AF4
+#define IFCVF_NET_DEVICE_ID                 0x1041
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_SUBSYS_VENDOR_ID              0x8086
+#define IFCVF_SUBSYS_DEVICE_ID              0x001A
+#define IFCVF_BLK_DEVICE_ID                 0x0002
 
 #define IFCVF_MAX_QUEUES		1
 
@@ -126,13 +133,18 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	int device_type;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..1eed90b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -75,6 +75,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1138,7 +1144,7 @@ struct internal_list {
 	return 0;
 }
 
-static struct rte_vdpa_dev_ops ifcvf_ops = {
+static struct rte_vdpa_dev_ops ifcvf_net_ops = {
 	.get_queue_num = ifcvf_get_queue_num,
 	.get_features = ifcvf_get_vdpa_features,
 	.get_protocol_features = ifcvf_get_protocol_features,
@@ -1167,6 +1173,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_net_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1226,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1276,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->hw.device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->hw.device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1305,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->hw.device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1308,11 +1369,25 @@ struct internal_list {
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
 	{ .class_id = RTE_CLASS_ANY_ID,
 	  .vendor_id = IFCVF_VENDOR_ID,
-	  .device_id = IFCVF_DEVICE_ID,
+	  .device_id = IFCVF_NET_DEVICE_ID,
 	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 02/13] vhost: add vDPA ops for blk device
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-23  9:35     ` [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  2:38       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 03/13] vhost: add vhost msg support for get/set config Andy Pei
                       ` (10 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..c4233a6 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t size);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 03/13] vhost: add vhost msg support for get/set config
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-23  9:35     ` [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-05-23  9:35     ` [PATCH v9 02/13] vhost: add vDPA ops for " Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  2:52       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (9 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 13 ++++++++
 2 files changed, 101 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 850848c..b37d814 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2468,6 +2468,92 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (!vdpa_dev) {
+		VHOST_LOG_CONFIG(ERR,
+				 "(%s) is not vDPA device!\n",
+				 dev->ifname);
+		goto out;
+	}
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) get_config() return error!\n",
+					 dev->ifname);
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
+				 dev->ifname);
+	}
+
+out:
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) vhost_user_config size: %"PRIu32", should not be larger than %d\n",
+			dev->ifname, ctx->msg.payload.cfg.size,
+			VHOST_USER_MAX_CONFIG_SIZE);
+		goto out;
+	}
+
+	if (!vdpa_dev) {
+		VHOST_LOG_CONFIG(ERR,
+				 "(%s) is not vDPA device!\n",
+				 dev->ifname);
+		goto out;
+	}
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+		if (ret)
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) set_config() return error!\n",
+					 dev->ifname);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+
+out:
+	return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2686,6 +2772,8 @@ static int is_vring_iotlb(struct virtio_net *dev,
 VHOST_MESSAGE_HANDLER(VHOST_USER_NET_SET_MTU, vhost_user_net_set_mtu, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_SET_SLAVE_REQ_FD, vhost_user_set_req_fd, true) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_IOTLB_MSG, vhost_user_iotlb_msg, false) \
+VHOST_MESSAGE_HANDLER(VHOST_USER_GET_CONFIG, vhost_user_get_config, false) \
+VHOST_MESSAGE_HANDLER(VHOST_USER_SET_CONFIG, vhost_user_set_config, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_ADVISE, vhost_user_set_postcopy_advise, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_LISTEN, vhost_user_set_postcopy_listen, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_END, vhost_user_postcopy_end, false) \
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index ba1c5c7..c4d091e 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -123,6 +125,16 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -146,6 +158,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 03/13] vhost: add vhost msg support for get/set config Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  2:55       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
                       ` (8 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 91 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 94 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index a761d49..b6fdfdb 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,10 @@
 #define IFCVF_32_BIT_MASK		0xffffffff
 
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1eed90b..c1fc1d7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1087,6 +1087,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1199,6 +1203,91 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t size)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (size != sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			size, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	/* The capacity is number of sectors in 512-byte.
+	 * So right shift 1 bit  we get in K,
+	 * another right shift 10 bits we get in M,
+	 * right shift 10 more bits, we get in G.
+	 * To show capacity in G, we right shift 21 bits in total.
+	 */
+	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(DEBUG, "geometry");
+	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = ifcvf_set_vring_state,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1211,7 +1300,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 05/13] vdpa/ifc: add vDPA interrupt relay for blk device
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  2:58       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (7 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
For the net device type, only interrupt of rxq needed to be relayed.
But for block, since all the queues are used for both read and write
requests. Interrupt of all queues needed to be relayed.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c1fc1d7..1d05529 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -369,6 +369,7 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
@@ -378,7 +379,13 @@ struct rte_vdpa_dev_info {
 	for (i = 0; i < nr_vring; i++) {
 		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
 		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
+		if (m_rx == true &&
+			((i & 1) == 0 || internal->hw.device_type == IFCVF_BLK)) {
+			/* For the net we only need to relay rx queue,
+			 * which will change the mem of VM.
+			 * For the blk we need to relay all the read cmd
+			 * of each queue
+			 */
 			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
 			if (fd < 0) {
 				DRV_LOG(ERR, "can't setup eventfd: %s",
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 06/13] vdpa/ifc: add block device SW live-migration
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:00       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 07/13] vhost: add API to get vDPA device type Andy Pei
                       ` (6 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add SW live-migration support to block device.
For block device, it is critical that no packet
should be dropped. So when virtio blk device is
paused, make sure hardware last_avail_idx and
last_used_idx are the same. This indicates all
requests have received acks, and no inflight IO.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 42 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index b6fdfdb..9d95aac 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -63,6 +63,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1d05529..7a0bdb4 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -316,8 +316,34 @@ struct rte_vdpa_dev_info {
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
+
+	/* to make sure no packet is lost for blk device
+	 * do not stop until last_avail_idx == last_used_idx
+	 */
+	if (internal->hw.device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
+		}
+	}
+
 	ifcvf_stop_hw(hw);
 
 	for (i = 0; i < hw->nr_vring; i++)
@@ -641,8 +667,10 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NET: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
@@ -692,8 +720,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->hw.device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->hw.device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -755,7 +787,9 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
+	for (qid = 0; qid < q_num; qid += 1) {
+		if ((internal->hw.device_type == IFCVF_NET) && (qid & 1))
+			continue;
 		ev.events = EPOLLIN | EPOLLPRI;
 		/* leave a flag to mark it's for interrupt */
 		ev.data.u64 = 1 | qid << 1 |
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 07/13] vhost: add API to get vDPA device type
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:01       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
                       ` (5 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Vhost backend of different devices have different features.
Add an API to get vDPA device type, net device or blk device
currently, so users can set different features for different
kinds of devices.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 doc/guides/prog_guide/vhost_lib.rst    |  5 ++++
 doc/guides/rel_notes/release_22_07.rst |  4 ++++
 lib/vhost/rte_vhost.h                  | 17 +++++++++++++
 lib/vhost/socket.c                     | 44 ++++++++++++++++++++++++++++++++++
 lib/vhost/vdpa_driver.h                |  3 +++
 lib/vhost/version.map                  |  1 +
 6 files changed, 74 insertions(+)
diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst
index f287b76..0337b38 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -282,6 +282,11 @@ The following is an overview of some key Vhost API functions:
   Clear inflight packets which are submitted to DMA engine in vhost async data
   path. Completed packets are returned to applications through ``pkts``.
 
+* ``rte_vhost_driver_get_vdpa_dev_type(path, type)``
+
+  Get device type of vDPA device, such as VDPA_DEVICE_TYPE_NET,
+  VDPA_DEVICE_TYPE_BLK.
+
 Vhost-user Implementations
 --------------------------
 
diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
index e49cace..63875b7 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -60,6 +60,10 @@ New Features
   Added an API which can get the number of in-flight packets in
   vhost async data path without using lock.
 
+* **Added vhost API to get the device type of a vDPA device.**
+
+  Added an API which can get the device type of vDPA device.
+
 * **Updated Intel iavf driver.**
 
   * Added Tx QoS queue rate limitation support.
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index c733f85..2f130ec 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -117,6 +117,9 @@
 
 #define RTE_MAX_VHOST_DEVICE	1024
 
+#define RTE_VHOST_VDPA_DEVICE_TYPE_NET 0
+#define RTE_VHOST_VDPA_DEVICE_TYPE_BLK 1
+
 struct rte_vdpa_device;
 
 /**
@@ -486,6 +489,20 @@ struct rte_vdpa_device *
 rte_vhost_driver_get_vdpa_device(const char *path);
 
 /**
+ * Get the device type of the vdpa device.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param type
+ *  the device type of the vdpa device
+ * @return
+ *  0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index b304339..baef4d2 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -619,6 +619,50 @@ struct rte_vdpa_device *
 }
 
 int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
+{
+	struct vhost_user_socket *vsocket;
+	struct rte_vdpa_device *vdpa_dev;
+	uint32_t vdpa_type = 0;
+	int ret = 0;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (!vsocket) {
+		VHOST_LOG_CONFIG(ERR,
+				 "(%s) socket file is not registered yet.\n",
+				 path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	vdpa_dev = vsocket->vdpa_dev;
+	if (!vdpa_dev) {
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_dev_type) {
+		ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
+		if (ret) {
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) failed to get vdpa dev type for socket file.\n",
+					 path);
+			ret = -1;
+			goto unlock_exit;
+		}
+	} else {
+		vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+	}
+
+	*type = vdpa_type;
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
 	struct vhost_user_socket *vsocket;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index c4233a6..8b88a53 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
 	/** Set the device configuration space */
 	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
 		      uint32_t size, uint32_t flags);
+
+	/** get device type: net device, blk device... */
+	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
 };
 
 /**
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 5841315..583b4f3 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -90,6 +90,7 @@ EXPERIMENTAL {
 
 	# added in 22.07
 	rte_vhost_async_get_inflight_thread_unsafe;
+	rte_vhost_driver_get_vdpa_dev_type;
 
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 08/13] vdpa/ifc: add get device type ops to ifc driver
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 07/13] vhost: add API to get vDPA device type Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:02       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 09/13] examples/vdpa: add virtio blk support Andy Pei
                       ` (4 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add get device type ops to ifc driver.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 7a0bdb4..1c5746a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1189,6 +1189,29 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static int
+ifcvf_get_device_type(struct rte_vdpa_device *vdev,
+	uint32_t *type)
+{
+	struct ifcvf_internal *internal;
+	struct internal_list *list;
+
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	if (internal->hw.device_type == IFCVF_BLK)
+		*type = RTE_VHOST_VDPA_DEVICE_TYPE_BLK;
+	else
+		*type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+
+	return 0;
+}
+
 static struct rte_vdpa_dev_ops ifcvf_net_ops = {
 	.get_queue_num = ifcvf_get_queue_num,
 	.get_features = ifcvf_get_vdpa_features,
@@ -1201,6 +1224,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
+	.get_dev_type = ifcvf_get_device_type,
 };
 
 static inline int
@@ -1327,6 +1351,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
 	.get_config = ifcvf_blk_get_config,
+	.get_dev_type = ifcvf_get_device_type,
 };
 
 struct rte_vdpa_dev_info dev_info[] = {
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 09/13] examples/vdpa: add virtio blk support
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:03       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 10/13] usertools: add support for virtio blk device Andy Pei
                       ` (3 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add virtio blk device support to vDPA example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 examples/vdpa/main.c             | 56 ++++++++++++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 50 +++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..7e11ef4 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -159,8 +160,53 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
+	uint32_t device_type = 0;
 	int ret;
 	char *socket_path = vport->ifname;
 
@@ -192,6 +238,16 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
+	if (ret == 0 && device_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+		RTE_LOG(NOTICE, VDPA, "%s is a blk device\n", socket_path);
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..143548e
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+
+/* Legacy feature bits */
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1))
+
+#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX))
+
+#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
+	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 10/13] usertools: add support for virtio blk device
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 09/13] examples/vdpa: add virtio blk support Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:05       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
                       ` (2 subsequent siblings)
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 usertools/dpdk-devbind.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..18c7d67 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -72,6 +72,9 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001,1042',
+                    'SVendor': None, 'SDevice': None}
+
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -82,7 +85,7 @@
 compress_devices = [cavium_zip]
 regex_devices = [cn9k_ree]
 misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
-                intel_ntb_skx, intel_ntb_icx]
+                intel_ntb_skx, intel_ntb_icx, virtio_blk]
 
 # global dict ethernet devices present. Dictionary indexed by PCI address.
 # Each device within this is itself a dictionary of device properties
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 11/13] vdpa/ifc: add log for config space of virtio blk
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 10/13] usertools: add support for virtio blk device Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:06       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
  2022-05-23  9:35     ` [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1c5746a..80d6261 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1382,6 +1382,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1448,6 +1451,37 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		/* The capacity is number of sectors in 512-byte.
+		 * So right shift 1 bit  we get in K,
+		 * another right shift 10 bits we get in M,
+		 * right shift 10 more bits, we get in G.
+		 * To show capacity in G, we right shift 21 bits in total.
+		 */
+		DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(DEBUG, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(DEBUG, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(DEBUG, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(DEBUG, "geometry");
+		DRV_LOG(DEBUG, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(DEBUG, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(DEBUG, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(DEBUG, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 12/13] vdpa/ifc: add interrupt handling for config space
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:09       ` Xia, Chenbo
  2022-05-23  9:35     ` [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to inform QEMU.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 121 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 120 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 80d6261..af1f192 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid; /* thread for config space change interrupt relay */
 	int epfd;
+	int csc_epfd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -566,6 +568,114 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_epfd, csc_val = 0;
+
+	csc_epfd = epoll_create(1);
+	if (csc_epfd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_epfd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		goto out;
+	}
+
+	internal->csc_epfd = csc_epfd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_epfd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail.");
+			goto out;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR ||
+				    errno == EWOULDBLOCK ||
+				    errno == EAGAIN)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				goto out;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+out:
+	if (csc_epfd >= 0)
+		close(csc_epfd);
+	internal->csc_epfd = -1;
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	char name[THREAD_NAME_LEN];
+	int ret;
+
+	snprintf(name, sizeof(name), "ifc-intr-%d", internal->vid);
+	ret = rte_ctrl_thread_create(&internal->intr_tid, name, NULL,
+				     intr_relay, (void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_epfd >= 0)
+		close(internal->csc_epfd);
+	internal->csc_epfd = -1;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -592,10 +702,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -812,7 +928,7 @@ struct rte_vdpa_dev_info {
 		if (nfds < 0) {
 			if (errno == EINTR)
 				continue;
-			DRV_LOG(ERR, "epoll_wait return fail\n");
+			DRV_LOG(ERR, "epoll_wait return fail.");
 			return NULL;
 		}
 
@@ -888,6 +1004,9 @@ struct rte_vdpa_dev_info {
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
 	vdpa_ifcvf_stop(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk device
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-05-23  9:35     ` [PATCH v9 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
@ 2022-05-23  9:35     ` Andy Pei
  2022-05-24  3:10       ` Xia, Chenbo
  12 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-23  9:35 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Register address is different between net and blk device.
We are re-useing most of the code, when register address is
different, we have to check net and blk device go through
different code.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..dd475a7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,17 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->device_type == IFCVF_BLK)
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		else
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +261,22 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->device_type == IFCVF_BLK)
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		else
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+
+		if (hw->device_type == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device
  2022-05-23  9:35     ` [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-05-24  2:37       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  2:37 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device
> 
> Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
> Blk and net device are implemented with proper feature and ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h | 22 +++++++---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 95
> ++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 102 insertions(+), 15 deletions(-)
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 02/13] vhost: add vDPA ops for blk device
  2022-05-23  9:35     ` [PATCH v9 02/13] vhost: add vDPA ops for " Andy Pei
@ 2022-05-24  2:38       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  2:38 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 02/13] vhost: add vDPA ops for blk device
> 
> Get_config and set_config are necessary ops for blk device.
> Add get_config and set_config ops to vDPA ops.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/vhost/vdpa_driver.h | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
> index 88138be..c4233a6 100644
> --- a/lib/vhost/vdpa_driver.h
> +++ b/lib/vhost/vdpa_driver.h
> @@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
>  	/** Reset statistics of the queue */
>  	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
> 
> -	/** Reserved for future extension */
> -	void *reserved[2];
> +	/** Get the device configuration space */
> +	int (*get_config)(int vid, uint8_t *config, uint32_t size);
> +
> +	/** Set the device configuration space */
> +	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
> +		      uint32_t size, uint32_t flags);
>  };
> 
>  /**
> --
> 1.8.3.1
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc
  2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
                     ` (7 preceding siblings ...)
  2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-05-24  2:48   ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
                       ` (13 more replies)
  8 siblings, 14 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
v10:
 fix typo.
 add more NULL pointer check.
 fix a error handler.
v9:
 fix some naming issue.
 add more NULL pointer check.
 close open file when errors occur.
v8:
 delete some redundant code.
 fix some commit log.
v7:
 Check on expected fd num in new vhost msg handler.
 Sanity check on vhost msg size.
 Fix typo.
 Add commit log to help understand code.
 Remove duplicated code.
 Add new API to get vDPA device type.
v6:
 fix some commit log.
 add vhost socket in log output to make it more user-friendly.
 when driver ops fail, just output some log, do not break message handler.
 check vhost msg size in msg handler.
v5:
 fix some coding style issues.
v4:
 add args "isblk" to vdpa example to specify a block device, fix some
 issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.
Andy Pei (13):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support for get/set config
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt relay for blk device
  vdpa/ifc: add block device SW live-migration
  vhost: add API to get vDPA device type
  vdpa/ifc: add get device type ops to ifc driver
  examples/vdpa: add virtio blk support
  usertools: add support for virtio blk device
  vdpa/ifc: add log for config space of virtio blk
  vdpa/ifc: add interrupt handling for config space
  vdpa/ifc/base: access correct register for blk device
 doc/guides/prog_guide/vhost_lib.rst    |   5 +
 doc/guides/rel_notes/release_22_07.rst |   4 +
 drivers/vdpa/ifc/base/ifcvf.c          |  34 ++-
 drivers/vdpa/ifc/base/ifcvf.h          |  27 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c          | 420 +++++++++++++++++++++++++++++++--
 examples/vdpa/main.c                   |  56 +++++
 examples/vdpa/vdpa_blk_compact.h       |  50 ++++
 lib/vhost/rte_vhost.h                  |  17 ++
 lib/vhost/socket.c                     |  44 ++++
 lib/vhost/vdpa_driver.h                |  11 +-
 lib/vhost/version.map                  |   1 +
 lib/vhost/vhost_user.c                 |  85 +++++++
 lib/vhost/vhost_user.h                 |  13 +
 usertools/dpdk-devbind.py              |   5 +-
 14 files changed, 741 insertions(+), 31 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 01/13] vdpa/ifc: add support for virtio blk device
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 02/13] vhost: add vDPA ops for " Andy Pei
                       ` (12 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h | 22 +++++++---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 95 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 102 insertions(+), 15 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..a761d49 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,12 +5,19 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include <linux/virtio_blk.h>
 #include "ifcvf_osdep.h"
 
-#define IFCVF_VENDOR_ID		0x1AF4
-#define IFCVF_DEVICE_ID		0x1041
-#define IFCVF_SUBSYS_VENDOR_ID	0x8086
-#define IFCVF_SUBSYS_DEVICE_ID	0x001A
+#define IFCVF_NET	0
+#define IFCVF_BLK	1
+
+#define IFCVF_VENDOR_ID                     0x1AF4
+#define IFCVF_NET_DEVICE_ID                 0x1041
+#define IFCVF_BLK_MODERN_DEVICE_ID          0x1042
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID    0x1001
+#define IFCVF_SUBSYS_VENDOR_ID              0x8086
+#define IFCVF_SUBSYS_DEVICE_ID              0x001A
+#define IFCVF_BLK_DEVICE_ID                 0x0002
 
 #define IFCVF_MAX_QUEUES		1
 
@@ -126,13 +133,18 @@ struct ifcvf_hw {
 	u8     notify_region;
 	u32    notify_off_multiplier;
 	struct ifcvf_pci_common_cfg *common_cfg;
-	struct ifcvf_net_config *dev_cfg;
+	union {
+		struct ifcvf_net_config *net_cfg;
+		struct virtio_blk_config *blk_cfg;
+		void *dev_cfg;
+	};
 	u8     *isr;
 	u16    *notify_base;
 	u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
 	u8     *lm_cfg;
 	struct vring_info vring[IFCVF_MAX_QUEUES * 2];
 	u8 nr_vring;
+	int device_type;
 	struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..1eed90b 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -75,6 +75,12 @@ struct internal_list {
 	struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+	uint64_t features;
+	struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
 	TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1138,7 +1144,7 @@ struct internal_list {
 	return 0;
 }
 
-static struct rte_vdpa_dev_ops ifcvf_ops = {
+static struct rte_vdpa_dev_ops ifcvf_net_ops = {
 	.get_queue_num = ifcvf_get_queue_num,
 	.get_features = ifcvf_get_vdpa_features,
 	.get_protocol_features = ifcvf_get_protocol_features,
@@ -1167,6 +1173,48 @@ struct internal_list {
 	return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+	uint16_t pci_device_id = pci_dev->id.device_id;
+	uint16_t device_id;
+
+	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+		DRV_LOG(ERR, "Probe device is not a virtio device\n");
+		return -1;
+	}
+
+	if (pci_device_id < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		device_id = pci_dev->id.subsystem_device_id;
+	} else {
+		/* Modern devices: simply use PCI device id,
+		 * but start from 0x1040.
+		 */
+		device_id = pci_device_id - 0x1040;
+	}
+
+	return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+	{
+		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+			    (1ULL << VIRTIO_NET_F_STATUS) |
+			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = &ifcvf_net_ops,
+	},
+	{
+		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+			    (1ULL << VHOST_F_LOG_ALL),
+		.ops = NULL,
+	},
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		struct rte_pci_device *pci_dev)
@@ -1178,6 +1226,7 @@ struct internal_list {
 	int sw_fallback_lm = 0;
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
+	int16_t device_id;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1227,13 +1276,24 @@ struct internal_list {
 	internal->configured = 0;
 	internal->max_queues = IFCVF_MAX_QUEUES;
 	features = ifcvf_get_features(&internal->hw);
-	internal->features = (features &
-		~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-		(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-		(1ULL << VIRTIO_NET_F_CTRL_VQ) |
-		(1ULL << VIRTIO_NET_F_STATUS) |
-		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-		(1ULL << VHOST_F_LOG_ALL);
+
+	device_id = ifcvf_pci_get_device_type(pci_dev);
+	if (device_id < 0) {
+		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
+		goto error;
+	}
+
+	if (device_id == VIRTIO_ID_NET) {
+		internal->hw.device_type = IFCVF_NET;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_NET].features;
+	} else if (device_id == VIRTIO_ID_BLOCK) {
+		internal->hw.device_type = IFCVF_BLK;
+		internal->features = features &
+					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+		internal->features |= dev_info[IFCVF_BLK].features;
+	}
 
 	list->internal = internal;
 
@@ -1245,7 +1305,8 @@ struct internal_list {
 	}
 	internal->sw_lm = sw_fallback_lm;
 
-	internal->vdev = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
+	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
+				dev_info[internal->hw.device_type].ops);
 	if (internal->vdev == NULL) {
 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
 		goto error;
@@ -1308,11 +1369,25 @@ struct internal_list {
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
 	{ .class_id = RTE_CLASS_ANY_ID,
 	  .vendor_id = IFCVF_VENDOR_ID,
-	  .device_id = IFCVF_DEVICE_ID,
+	  .device_id = IFCVF_NET_DEVICE_ID,
 	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
 	},
 
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
+	{ .class_id = RTE_CLASS_ANY_ID,
+	  .vendor_id = IFCVF_VENDOR_ID,
+	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
+	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+	  .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+	},
+
 	{ .vendor_id = 0, /* sentinel */
 	},
 };
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 02/13] vhost: add vDPA ops for blk device
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-24  2:48     ` [PATCH v10 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 03/13] vhost: add vhost msg support for get/set config Andy Pei
                       ` (11 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 lib/vhost/vdpa_driver.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..c4233a6 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
 	/** Reset statistics of the queue */
 	int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-	/** Reserved for future extension */
-	void *reserved[2];
+	/** Get the device configuration space */
+	int (*get_config)(int vid, uint8_t *config, uint32_t size);
+
+	/** Set the device configuration space */
+	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+		      uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 03/13] vhost: add vhost msg support for get/set config
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
  2022-05-24  2:48     ` [PATCH v10 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
  2022-05-24  2:48     ` [PATCH v10 02/13] vhost: add vDPA ops for " Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  3:49       ` Xia, Chenbo
  2022-05-24  2:48     ` [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
                       ` (10 subsequent siblings)
  13 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 lib/vhost/vhost_user.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/vhost/vhost_user.h | 13 ++++++++
 2 files changed, 98 insertions(+)
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 850848c..4f2a777 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2468,6 +2468,89 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (!vdpa_dev) {
+		VHOST_LOG_CONFIG(ERR, "(%s) is not vDPA device!\n",
+				 dev->ifname);
+		return RTE_VHOST_MSG_RESULT_ERR;
+	}
+
+	if (vdpa_dev->ops->get_config) {
+		ret = vdpa_dev->ops->get_config(dev->vid,
+					   ctx->msg.payload.cfg.region,
+					   ctx->msg.payload.cfg.size);
+		if (ret != 0) {
+			ctx->msg.size = 0;
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) get_config() return error!\n",
+					 dev->ifname);
+		}
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+			struct vhu_msg_context *ctx,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+	int ret = 0;
+
+	if (validate_msg_fds(dev, ctx, 0) != 0)
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
+		VHOST_LOG_CONFIG(ERR,
+			"(%s) vhost_user_config size: %"PRIu32", should not be larger than %d\n",
+			dev->ifname, ctx->msg.payload.cfg.size,
+			VHOST_USER_MAX_CONFIG_SIZE);
+		goto out;
+	}
+
+	if (!vdpa_dev) {
+		VHOST_LOG_CONFIG(ERR, "(%s) is not vDPA device!\n",
+				 dev->ifname);
+		goto out;
+	}
+
+	if (vdpa_dev->ops->set_config) {
+		ret = vdpa_dev->ops->set_config(dev->vid,
+			ctx->msg.payload.cfg.region,
+			ctx->msg.payload.cfg.offset,
+			ctx->msg.payload.cfg.size,
+			ctx->msg.payload.cfg.flags);
+		if (ret)
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) set_config() return error!\n",
+					 dev->ifname);
+	} else {
+		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
+				 dev->ifname);
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+
+out:
+	return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
 			struct vhu_msg_context *ctx,
 			int main_fd __rte_unused)
@@ -2686,6 +2769,8 @@ static int is_vring_iotlb(struct virtio_net *dev,
 VHOST_MESSAGE_HANDLER(VHOST_USER_NET_SET_MTU, vhost_user_net_set_mtu, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_SET_SLAVE_REQ_FD, vhost_user_set_req_fd, true) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_IOTLB_MSG, vhost_user_iotlb_msg, false) \
+VHOST_MESSAGE_HANDLER(VHOST_USER_GET_CONFIG, vhost_user_get_config, false) \
+VHOST_MESSAGE_HANDLER(VHOST_USER_SET_CONFIG, vhost_user_set_config, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_ADVISE, vhost_user_set_postcopy_advise, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_LISTEN, vhost_user_set_postcopy_listen, false) \
 VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_END, vhost_user_postcopy_end, false) \
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index ba1c5c7..c4d091e 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
 	VHOST_USER_POSTCOPY_ADVISE = 28,
@@ -123,6 +125,16 @@
 	uint16_t queue_size;
 } VhostUserInflight;
 
+#define VHOST_USER_MAX_CONFIG_SIZE		256
+
+/** Get/set config msg payload */
+struct vhost_user_config {
+	uint32_t offset;
+	uint32_t size;
+	uint32_t flags;
+	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+};
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -146,6 +158,7 @@
 		VhostUserCryptoSessionParam crypto_session;
 		VhostUserVringArea area;
 		VhostUserInflight inflight;
+		struct vhost_user_config cfg;
 	} payload;
 	/* Nothing should be added after the payload */
 } __rte_packed VhostUserMsg;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (2 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 03/13] vhost: add vhost msg support for get/set config Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  6:28       ` Xia, Chenbo
  2022-05-24  2:48     ` [PATCH v10 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
                       ` (9 subsequent siblings)
  13 siblings, 1 reply; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 96 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 99 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index a761d49..b6fdfdb 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,10 @@
 #define IFCVF_32_BIT_MASK		0xffffffff
 
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+#endif
+
 struct ifcvf_pci_cap {
 	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
 	u8 cap_next;            /* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1eed90b..10a4a25 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1087,6 +1087,10 @@ struct rte_vdpa_dev_info {
 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1199,6 +1203,96 @@ struct rte_vdpa_dev_info {
 	return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t size)
+{
+	struct virtio_blk_config *dev_cfg;
+	struct ifcvf_internal *internal;
+	struct rte_vdpa_device *vdev;
+	struct internal_list *list;
+	uint32_t i;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+
+	if (size != sizeof(struct virtio_blk_config)) {
+		DRV_LOG(ERR, "Invalid len: %u, required: %u",
+			size, (uint32_t)sizeof(struct virtio_blk_config));
+		return -1;
+	}
+
+	vdev = rte_vhost_get_vdpa_device(vid);
+	if (vdev == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device vid: %d", vid);
+		return -1;
+	}
+
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+		config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+	/* cannot read 64-bit register in one attempt, so read byte by byte. */
+	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+		capacity |= (uint64_t)*byte << (i * 8);
+	}
+	/* The capacity is number of sectors in 512-byte.
+	 * So right shift 1 bit  we get in K,
+	 * another right shift 10 bits we get in M,
+	 * right shift 10 more bits, we get in G.
+	 * To show capacity in G, we right shift 21 bits in total.
+	 */
+	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
+	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
+	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
+	DRV_LOG(DEBUG, "geometry");
+	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
+	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
+	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
+	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+		config[0], config[1], config[2], config[3], config[4],
+		config[5], config[6], config[7]);
+	return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+	uint64_t *features)
+{
+	RTE_SET_USED(vdev);
+
+	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+	*features |= VDPA_BLK_PROTOCOL_FEATURES;
+	return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+	.get_queue_num = ifcvf_get_queue_num,
+	.get_features = ifcvf_get_vdpa_features,
+	.set_features = ifcvf_set_features,
+	.get_protocol_features = ifcvf_blk_get_protocol_features,
+	.dev_conf = ifcvf_dev_config,
+	.dev_close = ifcvf_dev_close,
+	.set_vring_state = ifcvf_set_vring_state,
+	.migration_done = NULL,
+	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+	.get_notify_area = ifcvf_get_notify_area,
+	.get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1211,7 +1305,7 @@ struct rte_vdpa_dev_info dev_info[] = {
 	{
 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
 			    (1ULL << VHOST_F_LOG_ALL),
-		.ops = NULL,
+		.ops = &ifcvf_blk_ops,
 	},
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 05/13] vdpa/ifc: add vDPA interrupt relay for blk device
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (3 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
                       ` (8 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
For the net device type, only interrupt of rxq needed to be relayed.
But for block, since all the queues are used for both read and write
requests. Interrupt of all queues needed to be relayed.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 10a4a25..61fb427 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -369,6 +369,7 @@ struct rte_vdpa_dev_info {
 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set->start = 0;
 	fd_ptr = (int *)&irq_set->data;
+	/* The first interrupt is for the configure space change notification */
 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
 		rte_intr_fd_get(internal->pdev->intr_handle);
 
@@ -378,7 +379,13 @@ struct rte_vdpa_dev_info {
 	for (i = 0; i < nr_vring; i++) {
 		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
 		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-		if ((i & 1) == 0 && m_rx == true) {
+		if (m_rx == true &&
+			((i & 1) == 0 || internal->hw.device_type == IFCVF_BLK)) {
+			/* For the net we only need to relay rx queue,
+			 * which will change the mem of VM.
+			 * For the blk we need to relay all the read cmd
+			 * of each queue
+			 */
 			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
 			if (fd < 0) {
 				DRV_LOG(ERR, "can't setup eventfd: %s",
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 06/13] vdpa/ifc: add block device SW live-migration
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (4 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 07/13] vhost: add API to get vDPA device type Andy Pei
                       ` (7 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add SW live-migration support to block device.
For block device, it is critical that no packet
should be dropped. So when virtio blk device is
paused, make sure hardware last_avail_idx and
last_used_idx are the same. This indicates all
requests have received acks, and no inflight IO.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 42 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index b6fdfdb..9d95aac 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -63,6 +63,7 @@
 #define IFCVF_MEDIATED_VRING		0x200000000000
 
 #define IFCVF_32_BIT_MASK		0xffffffff
+#define IFCVF_16_BIT_MASK		0xffff
 
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 61fb427..0f9db8a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -316,8 +316,34 @@ struct rte_vdpa_dev_info {
 	uint64_t features = 0;
 	uint64_t log_base = 0, log_size = 0;
 	uint64_t len;
+	u32 ring_state = 0;
 
 	vid = internal->vid;
+
+	/* to make sure no packet is lost for blk device
+	 * do not stop until last_avail_idx == last_used_idx
+	 */
+	if (internal->hw.device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			do {
+				if (hw->lm_cfg != NULL)
+					ring_state = *(u32 *)(hw->lm_cfg +
+						IFCVF_LM_RING_STATE_OFFSET +
+						i * IFCVF_LM_CFG_SIZE);
+				hw->vring[i].last_avail_idx =
+					(u16)(ring_state & IFCVF_16_BIT_MASK);
+				hw->vring[i].last_used_idx =
+					(u16)(ring_state >> 16);
+				if (hw->vring[i].last_avail_idx !=
+					hw->vring[i].last_used_idx) {
+					ifcvf_notify_queue(hw, i);
+					usleep(10);
+				}
+			} while (hw->vring[i].last_avail_idx !=
+				hw->vring[i].last_used_idx);
+		}
+	}
+
 	ifcvf_stop_hw(hw);
 
 	for (i = 0; i < hw->nr_vring; i++)
@@ -641,8 +667,10 @@ struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NET: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
@@ -692,8 +720,12 @@ struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (internal->hw.device_type == IFCVF_NET) {
+			if ((i & 1) == 0)
+				update_used_ring(internal, i);
+		} else if (internal->hw.device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -755,7 +787,9 @@ struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
+	for (qid = 0; qid < q_num; qid += 1) {
+		if ((internal->hw.device_type == IFCVF_NET) && (qid & 1))
+			continue;
 		ev.events = EPOLLIN | EPOLLPRI;
 		/* leave a flag to mark it's for interrupt */
 		ev.data.u64 = 1 | qid << 1 |
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 07/13] vhost: add API to get vDPA device type
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (5 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
                       ` (6 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Vhost backend of different devices have different features.
Add an API to get vDPA device type, net device or blk device
currently, so users can set different features for different
kinds of devices.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 doc/guides/prog_guide/vhost_lib.rst    |  5 ++++
 doc/guides/rel_notes/release_22_07.rst |  4 ++++
 lib/vhost/rte_vhost.h                  | 17 +++++++++++++
 lib/vhost/socket.c                     | 44 ++++++++++++++++++++++++++++++++++
 lib/vhost/vdpa_driver.h                |  3 +++
 lib/vhost/version.map                  |  1 +
 6 files changed, 74 insertions(+)
diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst
index f287b76..0337b38 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -282,6 +282,11 @@ The following is an overview of some key Vhost API functions:
   Clear inflight packets which are submitted to DMA engine in vhost async data
   path. Completed packets are returned to applications through ``pkts``.
 
+* ``rte_vhost_driver_get_vdpa_dev_type(path, type)``
+
+  Get device type of vDPA device, such as VDPA_DEVICE_TYPE_NET,
+  VDPA_DEVICE_TYPE_BLK.
+
 Vhost-user Implementations
 --------------------------
 
diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
index e49cace..63875b7 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -60,6 +60,10 @@ New Features
   Added an API which can get the number of in-flight packets in
   vhost async data path without using lock.
 
+* **Added vhost API to get the device type of a vDPA device.**
+
+  Added an API which can get the device type of vDPA device.
+
 * **Updated Intel iavf driver.**
 
   * Added Tx QoS queue rate limitation support.
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index c733f85..2f130ec 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -117,6 +117,9 @@
 
 #define RTE_MAX_VHOST_DEVICE	1024
 
+#define RTE_VHOST_VDPA_DEVICE_TYPE_NET 0
+#define RTE_VHOST_VDPA_DEVICE_TYPE_BLK 1
+
 struct rte_vdpa_device;
 
 /**
@@ -486,6 +489,20 @@ struct rte_vdpa_device *
 rte_vhost_driver_get_vdpa_device(const char *path);
 
 /**
+ * Get the device type of the vdpa device.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param type
+ *  the device type of the vdpa device
+ * @return
+ *  0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index b304339..baef4d2 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -619,6 +619,50 @@ struct rte_vdpa_device *
 }
 
 int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
+{
+	struct vhost_user_socket *vsocket;
+	struct rte_vdpa_device *vdpa_dev;
+	uint32_t vdpa_type = 0;
+	int ret = 0;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (!vsocket) {
+		VHOST_LOG_CONFIG(ERR,
+				 "(%s) socket file is not registered yet.\n",
+				 path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	vdpa_dev = vsocket->vdpa_dev;
+	if (!vdpa_dev) {
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_dev_type) {
+		ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
+		if (ret) {
+			VHOST_LOG_CONFIG(ERR,
+					 "(%s) failed to get vdpa dev type for socket file.\n",
+					 path);
+			ret = -1;
+			goto unlock_exit;
+		}
+	} else {
+		vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+	}
+
+	*type = vdpa_type;
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
 	struct vhost_user_socket *vsocket;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index c4233a6..8b88a53 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
 	/** Set the device configuration space */
 	int (*set_config)(int vid, uint8_t *config, uint32_t offset,
 		      uint32_t size, uint32_t flags);
+
+	/** get device type: net device, blk device... */
+	int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
 };
 
 /**
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 5841315..583b4f3 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -90,6 +90,7 @@ EXPERIMENTAL {
 
 	# added in 22.07
 	rte_vhost_async_get_inflight_thread_unsafe;
+	rte_vhost_driver_get_vdpa_dev_type;
 
 };
 
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 08/13] vdpa/ifc: add get device type ops to ifc driver
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (6 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 07/13] vhost: add API to get vDPA device type Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 09/13] examples/vdpa: add virtio blk support Andy Pei
                       ` (5 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add get device type ops to ifc driver.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0f9db8a..f4c6198 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1189,6 +1189,29 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static int
+ifcvf_get_device_type(struct rte_vdpa_device *vdev,
+	uint32_t *type)
+{
+	struct ifcvf_internal *internal;
+	struct internal_list *list;
+
+	list = find_internal_resource_by_vdev(vdev);
+	if (list == NULL) {
+		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+		return -1;
+	}
+
+	internal = list->internal;
+
+	if (internal->hw.device_type == IFCVF_BLK)
+		*type = RTE_VHOST_VDPA_DEVICE_TYPE_BLK;
+	else
+		*type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+
+	return 0;
+}
+
 static struct rte_vdpa_dev_ops ifcvf_net_ops = {
 	.get_queue_num = ifcvf_get_queue_num,
 	.get_features = ifcvf_get_vdpa_features,
@@ -1201,6 +1224,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
+	.get_dev_type = ifcvf_get_device_type,
 };
 
 static inline int
@@ -1332,6 +1356,7 @@ struct rte_vdpa_dev_info {
 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
 	.get_notify_area = ifcvf_get_notify_area,
 	.get_config = ifcvf_blk_get_config,
+	.get_dev_type = ifcvf_get_device_type,
 };
 
 struct rte_vdpa_dev_info dev_info[] = {
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 09/13] examples/vdpa: add virtio blk support
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (7 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 10/13] usertools: add support for virtio blk device Andy Pei
                       ` (4 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add virtio blk device support to vDPA example.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 examples/vdpa/main.c             | 56 ++++++++++++++++++++++++++++++++++++++++
 examples/vdpa/vdpa_blk_compact.h | 50 +++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..7e11ef4 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include <cmdline_parse_string.h>
 #include <cmdline_parse_num.h>
 #include <cmdline.h>
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -159,8 +160,53 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+	uint64_t protocol_features = 0;
+	int ret;
+
+	ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_disable_features(path,
+		VHOST_BLK_DISABLED_FEATURES);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_disable_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_get_protocol_features for %s failed.\n",
+			path);
+		goto out;
+	}
+
+	protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
+
+	ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+	if (ret != 0) {
+		RTE_LOG(ERR, VDPA,
+			"rte_vhost_driver_set_protocol_features for %s failed.\n",
+			path);
+	}
+
+out:
+	return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
+	uint32_t device_type = 0;
 	int ret;
 	char *socket_path = vport->ifname;
 
@@ -192,6 +238,16 @@ struct vdpa_port {
 			"attach vdpa device failed: %s\n",
 			socket_path);
 
+	ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
+	if (ret == 0 && device_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+		RTE_LOG(NOTICE, VDPA, "%s is a blk device\n", socket_path);
+		ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"set vhost blk driver features and protocol features failed: %s\n",
+				socket_path);
+	}
+
 	if (rte_vhost_driver_start(socket_path) < 0)
 		rte_exit(EXIT_FAILURE,
 			"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 0000000..143548e
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+#include <rte_vhost.h>
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX     1    /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX      2    /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY     4    /* Legacy geometry available  */
+#define VIRTIO_BLK_F_BLK_SIZE     6    /* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY     10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ           12   /* support more than one vq */
+
+/* Legacy feature bits */
+#define VIRTIO_BLK_F_BARRIER      0    /* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI         7    /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+
+#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
+	(1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+	(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+	(1ULL << VIRTIO_F_VERSION_1))
+
+#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+	(1ULL << VIRTIO_RING_F_EVENT_IDX))
+
+#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+	(1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
+	(1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
+	(1ULL << VIRTIO_BLK_F_MQ))
+
+/* Not supported features */
+#define VHOST_BLK_DISABLED_FEATURES (VHOST_BLK_DISABLED_FEATURES_BASE | \
+	(1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
+	(1ULL << VIRTIO_BLK_F_SCSI)  | (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
+
+/* Vhost-blk support protocol features */
+#define VHOST_BLK_PROTOCOL_FEATURES \
+	((1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
+	(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))
+
+#endif /* _VDPA_BLK_COMPACT_H_ */
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 10/13] usertools: add support for virtio blk device
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (8 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 09/13] examples/vdpa: add virtio blk support Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
                       ` (3 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add virtio blk device support to devbind.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Acked-by: Chenbo Xia <chenbo.xia@intel.com>
---
 usertools/dpdk-devbind.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..18c7d67 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -72,6 +72,9 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
                  'SVendor': None, 'SDevice': None}
 
+virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001,1042',
+                    'SVendor': None, 'SDevice': None}
+
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -82,7 +85,7 @@
 compress_devices = [cavium_zip]
 regex_devices = [cn9k_ree]
 misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
-                intel_ntb_skx, intel_ntb_icx]
+                intel_ntb_skx, intel_ntb_icx, virtio_blk]
 
 # global dict ethernet devices present. Dictionary indexed by PCI address.
 # Each device within this is itself a dictionary of device properties
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 11/13] vdpa/ifc: add log for config space of virtio blk
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (9 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 10/13] usertools: add support for virtio blk device Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
                       ` (2 subsequent siblings)
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f4c6198..30850b0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1387,6 +1387,9 @@ struct rte_vdpa_dev_info dev_info[] = {
 	struct rte_kvargs *kvlist = NULL;
 	int ret = 0;
 	int16_t device_id;
+	uint64_t capacity = 0;
+	uint8_t *byte;
+	uint32_t i;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
@@ -1453,6 +1456,37 @@ struct rte_vdpa_dev_info dev_info[] = {
 		internal->features = features &
 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		internal->features |= dev_info[IFCVF_BLK].features;
+
+		/* cannot read 64-bit register in one attempt,
+		 * so read byte by byte.
+		 */
+		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+			capacity |= (uint64_t)*byte << (i * 8);
+		}
+		/* The capacity is number of sectors in 512-byte.
+		 * So right shift 1 bit  we get in K,
+		 * another right shift 10 bits we get in M,
+		 * right shift 10 more bits, we get in G.
+		 * To show capacity in G, we right shift 21 bits in total.
+		 */
+		DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
+
+		DRV_LOG(DEBUG, "size_max  : 0x%08x",
+			internal->hw.blk_cfg->size_max);
+		DRV_LOG(DEBUG, "seg_max   : 0x%08x",
+			internal->hw.blk_cfg->seg_max);
+		DRV_LOG(DEBUG, "blk_size  : 0x%08x",
+			internal->hw.blk_cfg->blk_size);
+		DRV_LOG(DEBUG, "geometry");
+		DRV_LOG(DEBUG, "    cylinders: %u",
+			internal->hw.blk_cfg->geometry.cylinders);
+		DRV_LOG(DEBUG, "    heads    : %u",
+			internal->hw.blk_cfg->geometry.heads);
+		DRV_LOG(DEBUG, "    sectors  : %u",
+			internal->hw.blk_cfg->geometry.sectors);
+		DRV_LOG(DEBUG, "num_queues: 0x%08x",
+			internal->hw.blk_cfg->num_queues);
 	}
 
 	list->internal = internal;
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 12/13] vdpa/ifc: add interrupt handling for config space
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (10 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-05-24  2:48     ` [PATCH v10 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
  2022-06-01 10:10     ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Maxime Coquelin
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to inform QEMU.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 121 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 120 insertions(+), 1 deletion(-)
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 30850b0..40a18b2 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
 	int vfio_group_fd;
 	int vfio_dev_fd;
 	pthread_t tid;	/* thread for notify relay */
+	pthread_t intr_tid; /* thread for config space change interrupt relay */
 	int epfd;
+	int csc_epfd;
 	int vid;
 	struct rte_vdpa_device *vdev;
 	uint16_t max_queues;
@@ -566,6 +568,114 @@ struct rte_vdpa_dev_info {
 	return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+	int vid = internal->vid;
+	int ret;
+
+	ret = rte_vhost_slave_config_change(vid, 1);
+	if (ret)
+		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+	struct epoll_event csc_event;
+	struct epoll_event ev;
+	uint64_t buf;
+	int nbytes;
+	int csc_epfd, csc_val = 0;
+
+	csc_epfd = epoll_create(1);
+	if (csc_epfd < 0) {
+		DRV_LOG(ERR, "failed to create epoll for config space change.");
+		return NULL;
+	}
+
+	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+	if (epoll_ctl(csc_epfd, EPOLL_CTL_ADD,
+		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+		goto out;
+	}
+
+	internal->csc_epfd = csc_epfd;
+
+	for (;;) {
+		csc_val = epoll_wait(csc_epfd, &csc_event, 1, -1);
+		if (csc_val < 0) {
+			if (errno == EINTR)
+				continue;
+			DRV_LOG(ERR, "epoll_wait return fail.");
+			goto out;
+		} else if (csc_val == 0) {
+			continue;
+		} else {
+			/* csc_val > 0 */
+			nbytes = read(csc_event.data.fd, &buf, 8);
+			if (nbytes < 0) {
+				if (errno == EINTR ||
+				    errno == EWOULDBLOCK ||
+				    errno == EAGAIN)
+					continue;
+				DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n",
+					csc_event.data.fd,
+					strerror(errno));
+				goto out;
+			} else if (nbytes == 0) {
+				DRV_LOG(ERR, "Read nothing from file descriptor %d\n",
+					csc_event.data.fd);
+				continue;
+			} else {
+				virtio_interrupt_handler(internal);
+			}
+		}
+	}
+
+out:
+	if (csc_epfd >= 0)
+		close(csc_epfd);
+	internal->csc_epfd = -1;
+
+	return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+	char name[THREAD_NAME_LEN];
+	int ret;
+
+	snprintf(name, sizeof(name), "ifc-intr-%d", internal->vid);
+	ret = rte_ctrl_thread_create(&internal->intr_tid, name, NULL,
+				     intr_relay, (void *)internal);
+	if (ret) {
+		DRV_LOG(ERR, "failed to create notify relay pthread.");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+	void *status;
+
+	if (internal->intr_tid) {
+		pthread_cancel(internal->intr_tid);
+		pthread_join(internal->intr_tid, &status);
+	}
+	internal->intr_tid = 0;
+
+	if (internal->csc_epfd >= 0)
+		close(internal->csc_epfd);
+	internal->csc_epfd = -1;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -592,10 +702,16 @@ struct rte_vdpa_dev_info {
 		if (ret)
 			goto err;
 
+		ret = setup_intr_relay(internal);
+		if (ret)
+			goto err;
+
 		rte_atomic32_set(&internal->running, 1);
 	} else if (rte_atomic32_read(&internal->running) &&
 		   (!rte_atomic32_read(&internal->started) ||
 		    !rte_atomic32_read(&internal->dev_attached))) {
+		unset_intr_relay(internal);
+
 		ret = unset_notify_relay(internal);
 		if (ret)
 			goto err;
@@ -812,7 +928,7 @@ struct rte_vdpa_dev_info {
 		if (nfds < 0) {
 			if (errno == EINTR)
 				continue;
-			DRV_LOG(ERR, "epoll_wait return fail\n");
+			DRV_LOG(ERR, "epoll_wait return fail.");
 			return NULL;
 		}
 
@@ -888,6 +1004,9 @@ struct rte_vdpa_dev_info {
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
 	vdpa_ifcvf_stop(internal);
+
+	unset_intr_relay(internal);
+
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* [PATCH v10 13/13] vdpa/ifc/base: access correct register for blk device
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (11 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
@ 2022-05-24  2:48     ` Andy Pei
  2022-06-01 10:10     ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Maxime Coquelin
  13 siblings, 0 replies; 263+ messages in thread
From: Andy Pei @ 2022-05-24  2:48 UTC (permalink / raw)
  To: dev
  Cc: chenbo.xia, maxime.coquelin, gang.cao, changpeng.liu, rosen.xu,
	qimaix.xiao
Register address is different between net and blk device.
We are re-using most of the code, when register address is
different, we have to check net and blk device go through
different code.
Signed-off-by: Andy Pei <andy.pei@intel.com>
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)
diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..dd475a7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,17 @@
 				&cfg->queue_used_hi);
 		IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-		*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-			(u32)hw->vring[i].last_avail_idx |
-			((u32)hw->vring[i].last_used_idx << 16);
+		if (hw->device_type == IFCVF_BLK)
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				i * IFCVF_LM_CFG_SIZE) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
+		else
+			*(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+				(i / 2) * IFCVF_LM_CFG_SIZE +
+				(i % 2) * 4) =
+				(u32)hw->vring[i].last_avail_idx |
+				((u32)hw->vring[i].last_used_idx << 16);
 
 		IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
 		if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +261,22 @@
 		IFCVF_WRITE_REG16(i, &cfg->queue_select);
 		IFCVF_WRITE_REG16(0, &cfg->queue_enable);
 		IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-		ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-				(i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-		hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+		if (hw->device_type == IFCVF_BLK)
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					i * IFCVF_LM_CFG_SIZE);
+		else
+			ring_state = *(u32 *)(hw->lm_cfg +
+					IFCVF_LM_RING_STATE_OFFSET +
+					(i / 2) * IFCVF_LM_CFG_SIZE +
+					(i % 2) * 4);
+
+		if (hw->device_type == IFCVF_BLK)
+			hw->vring[i].last_avail_idx =
+				(u16)(ring_state & IFCVF_16_BIT_MASK);
+		else
+			hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
 		hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
 	}
 }
-- 
1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 03/13] vhost: add vhost msg support for get/set config
  2022-05-23  9:35     ` [PATCH v9 03/13] vhost: add vhost msg support for get/set config Andy Pei
@ 2022-05-24  2:52       ` Xia, Chenbo
  2022-05-24  3:24         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  2:52 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 03/13] vhost: add vhost msg support for get/set config
> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 88
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 13 ++++++++
>  2 files changed, 101 insertions(+)
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 850848c..b37d814 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -2468,6 +2468,92 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  }
> 
>  static int
> +vhost_user_get_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (!vdpa_dev) {
> +		VHOST_LOG_CONFIG(ERR,
> +				 "(%s) is not vDPA device!\n",
> +				 dev->ifname);
> +		goto out;
> +	}
In my understanding, this message is only targeted for vDPA device.
Traditional SW back-end will not use it. So if vdpa_dev is not attached,
you can just return RTE_VHOST_MSG_RESULT_ERR here. Another way is
setting msg size to 0 and reply, but I think in this case. We should just
break the msg handling.
Besides, two lines are enough for the log above:
+		VHOST_LOG_CONFIG(ERR, "(%s) is not vDPA device!\n",
+				 dev->ifname);
Thanks,
Chenbo
> +
> +	if (vdpa_dev->ops->get_config) {
> +		ret = vdpa_dev->ops->get_config(dev->vid,
> +					   ctx->msg.payload.cfg.region,
> +					   ctx->msg.payload.cfg.size);
> +		if (ret != 0) {
> +			ctx->msg.size = 0;
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) get_config() return error!\n",
> +					 dev->ifname);
> +		}
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +out:
> +	return RTE_VHOST_MSG_RESULT_REPLY;
> +}
> +
> +static int
> +vhost_user_set_config(struct virtio_net **pdev,
> +			struct vhu_msg_context *ctx,
> +			int main_fd __rte_unused)
> +{
> +	struct virtio_net *dev = *pdev;
> +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> +	int ret = 0;
> +
> +	if (validate_msg_fds(dev, ctx, 0) != 0)
> +		return RTE_VHOST_MSG_RESULT_ERR;
> +
> +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> +		VHOST_LOG_CONFIG(ERR,
> +			"(%s) vhost_user_config size: %"PRIu32", should not be
> larger than %d\n",
> +			dev->ifname, ctx->msg.payload.cfg.size,
> +			VHOST_USER_MAX_CONFIG_SIZE);
> +		goto out;
> +	}
> +
> +	if (!vdpa_dev) {
> +		VHOST_LOG_CONFIG(ERR,
> +				 "(%s) is not vDPA device!\n",
> +				 dev->ifname);
> +		goto out;
> +	}
> +
> +	if (vdpa_dev->ops->set_config) {
> +		ret = vdpa_dev->ops->set_config(dev->vid,
> +			ctx->msg.payload.cfg.region,
> +			ctx->msg.payload.cfg.offset,
> +			ctx->msg.payload.cfg.size,
> +			ctx->msg.payload.cfg.flags);
> +		if (ret)
> +			VHOST_LOG_CONFIG(ERR,
> +					 "(%s) set_config() return error!\n",
> +					 dev->ifname);
> +	} else {
> +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
> +				 dev->ifname);
> +	}
> +
> +	return RTE_VHOST_MSG_RESULT_OK;
> +
> +out:
> +	return RTE_VHOST_MSG_RESULT_ERR;
> +}
> +
> +static int
>  vhost_user_iotlb_msg(struct virtio_net **pdev,
>  			struct vhu_msg_context *ctx,
>  			int main_fd __rte_unused)
> @@ -2686,6 +2772,8 @@ static int is_vring_iotlb(struct virtio_net *dev,
>  VHOST_MESSAGE_HANDLER(VHOST_USER_NET_SET_MTU, vhost_user_net_set_mtu,
> false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_SLAVE_REQ_FD, vhost_user_set_req_fd,
> true) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_IOTLB_MSG, vhost_user_iotlb_msg, false)
> \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_GET_CONFIG, vhost_user_get_config, false)
> \
> +VHOST_MESSAGE_HANDLER(VHOST_USER_SET_CONFIG, vhost_user_set_config, false)
> \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_ADVISE,
> vhost_user_set_postcopy_advise, false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_LISTEN,
> vhost_user_set_postcopy_listen, false) \
>  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_END, vhost_user_postcopy_end,
> false) \
> diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
> index ba1c5c7..c4d091e 100644
> --- a/lib/vhost/vhost_user.h
> +++ b/lib/vhost/vhost_user.h
> @@ -50,6 +50,8 @@
>  	VHOST_USER_NET_SET_MTU = 20,
>  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
>  	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>  	VHOST_USER_POSTCOPY_ADVISE = 28,
> @@ -123,6 +125,16 @@
>  	uint16_t queue_size;
>  } VhostUserInflight;
> 
> +#define VHOST_USER_MAX_CONFIG_SIZE		256
> +
> +/** Get/set config msg payload */
> +struct vhost_user_config {
> +	uint32_t offset;
> +	uint32_t size;
> +	uint32_t flags;
> +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> +};
> +
>  typedef struct VhostUserMsg {
>  	union {
>  		uint32_t master; /* a VhostUserRequest value */
> @@ -146,6 +158,7 @@
>  		VhostUserCryptoSessionParam crypto_session;
>  		VhostUserVringArea area;
>  		VhostUserInflight inflight;
> +		struct vhost_user_config cfg;
>  	} payload;
>  	/* Nothing should be added after the payload */
>  } __rte_packed VhostUserMsg;
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-23  9:35     ` [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-05-24  2:55       ` Xia, Chenbo
  2022-05-24  3:27         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  2:55 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device
> 
> For virtio blk device, re-use part of ifc driver ops.
> Implement ifcvf_blk_get_config for virtio blk device.
> Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
> blk device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h |  4 ++
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 91
> ++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index a761d49..b6fdfdb 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -65,6 +65,10 @@
>  #define IFCVF_32_BIT_MASK		0xffffffff
> 
> 
> +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> +#endif
> +
>  struct ifcvf_pci_cap {
>  	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
>  	u8 cap_next;            /* Generic PCI field: next ptr. */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 1eed90b..c1fc1d7 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1087,6 +1087,10 @@ struct rte_vdpa_dev_info {
>  		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
>  		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
>  		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
> +
> +#define VDPA_BLK_PROTOCOL_FEATURES \
> +		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
> +
>  static int
>  ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t
> *features)
>  {
> @@ -1199,6 +1203,91 @@ struct rte_vdpa_dev_info {
>  	return device_id;
>  }
> 
> +static int
> +ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t size)
> +{
> +	struct virtio_blk_config *dev_cfg;
> +	struct ifcvf_internal *internal;
> +	struct rte_vdpa_device *vdev;
> +	struct internal_list *list;
> +	uint32_t i;
> +	uint64_t capacity = 0;
> +	uint8_t *byte;
> +
> +	if (size != sizeof(struct virtio_blk_config)) {
> +		DRV_LOG(ERR, "Invalid len: %u, required: %u",
> +			size, (uint32_t)sizeof(struct virtio_blk_config));
> +		return -1;
> +	}
> +
> +	vdev = rte_vhost_get_vdpa_device(vid);
Check vdev is not NULL here.
Thanks
Chenbo
> +	list = find_internal_resource_by_vdev(vdev);
> +	if (list == NULL) {
> +		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
> +		return -1;
> +	}
> +
> +	internal = list->internal;
> +
> +	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
> +		config[i] = *((u8 *)internal->hw.blk_cfg + i);
> +
> +	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
> +
> +	/* cannot read 64-bit register in one attempt, so read byte by byte.
> */
> +	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> +		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> +		capacity |= (uint64_t)*byte << (i * 8);
> +	}
> +	/* The capacity is number of sectors in 512-byte.
> +	 * So right shift 1 bit  we get in K,
> +	 * another right shift 10 bits we get in M,
> +	 * right shift 10 more bits, we get in G.
> +	 * To show capacity in G, we right shift 21 bits in total.
> +	 */
> +	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
> +
> +	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
> +	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
> +	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
> +	DRV_LOG(DEBUG, "geometry");
> +	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
> +	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
> +	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
> +	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
> +
> +	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
> +		config[0], config[1], config[2], config[3], config[4],
> +		config[5], config[6], config[7]);
> +	return 0;
> +}
> +
> +static int
> +ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
> +	uint64_t *features)
> +{
> +	RTE_SET_USED(vdev);
> +
> +	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
> +	*features |= VDPA_BLK_PROTOCOL_FEATURES;
> +	return 0;
> +}
> +
> +static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
> +	.get_queue_num = ifcvf_get_queue_num,
> +	.get_features = ifcvf_get_vdpa_features,
> +	.set_features = ifcvf_set_features,
> +	.get_protocol_features = ifcvf_blk_get_protocol_features,
> +	.dev_conf = ifcvf_dev_config,
> +	.dev_close = ifcvf_dev_close,
> +	.set_vring_state = ifcvf_set_vring_state,
> +	.migration_done = NULL,
> +	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
> +	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> +	.get_notify_area = ifcvf_get_notify_area,
> +	.get_config = ifcvf_blk_get_config,
> +};
> +
>  struct rte_vdpa_dev_info dev_info[] = {
>  	{
>  		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> @@ -1211,7 +1300,7 @@ struct rte_vdpa_dev_info dev_info[] = {
>  	{
>  		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
>  			    (1ULL << VHOST_F_LOG_ALL),
> -		.ops = NULL,
> +		.ops = &ifcvf_blk_ops,
>  	},
>  };
> 
> --
> 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 05/13] vdpa/ifc: add vDPA interrupt relay for blk device
  2022-05-23  9:35     ` [PATCH v9 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
@ 2022-05-24  2:58       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  2:58 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 05/13] vdpa/ifc: add vDPA interrupt relay for blk
> device
> 
> For the net device type, only interrupt of rxq needed to be relayed.
> But for block, since all the queues are used for both read and write
> requests. Interrupt of all queues needed to be relayed.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index c1fc1d7..1d05529 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -369,6 +369,7 @@ struct rte_vdpa_dev_info {
>  	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
>  	irq_set->start = 0;
>  	fd_ptr = (int *)&irq_set->data;
> +	/* The first interrupt is for the configure space change
> notification */
>  	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
>  		rte_intr_fd_get(internal->pdev->intr_handle);
> 
> @@ -378,7 +379,13 @@ struct rte_vdpa_dev_info {
>  	for (i = 0; i < nr_vring; i++) {
>  		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
>  		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> -		if ((i & 1) == 0 && m_rx == true) {
> +		if (m_rx == true &&
> +			((i & 1) == 0 || internal->hw.device_type == IFCVF_BLK))
> {
> +			/* For the net we only need to relay rx queue,
> +			 * which will change the mem of VM.
> +			 * For the blk we need to relay all the read cmd
> +			 * of each queue
> +			 */
>  			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>  			if (fd < 0) {
>  				DRV_LOG(ERR, "can't setup eventfd: %s",
> --
> 1.8.3.1
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
Btw: add my R-by in next version as I see in this version, they are missed.
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 06/13] vdpa/ifc: add block device SW live-migration
  2022-05-23  9:35     ` [PATCH v9 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
@ 2022-05-24  3:00       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:00 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 06/13] vdpa/ifc: add block device SW live-migration
> 
> Add SW live-migration support to block device.
> For block device, it is critical that no packet
> should be dropped. So when virtio blk device is
> paused, make sure hardware last_avail_idx and
> last_used_idx are the same. This indicates all
> requests have received acks, and no inflight IO.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h |  1 +
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 42
> ++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 39 insertions(+), 4 deletions(-)
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 07/13] vhost: add API to get vDPA device type
  2022-05-23  9:35     ` [PATCH v9 07/13] vhost: add API to get vDPA device type Andy Pei
@ 2022-05-24  3:01       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:01 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 07/13] vhost: add API to get vDPA device type
> 
> Vhost backend of different devices have different features.
> Add an API to get vDPA device type, net device or blk device
> currently, so users can set different features for different
> kinds of devices.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  doc/guides/prog_guide/vhost_lib.rst    |  5 ++++
>  doc/guides/rel_notes/release_22_07.rst |  4 ++++
>  lib/vhost/rte_vhost.h                  | 17 +++++++++++++
>  lib/vhost/socket.c                     | 44
> ++++++++++++++++++++++++++++++++++
>  lib/vhost/vdpa_driver.h                |  3 +++
>  lib/vhost/version.map                  |  1 +
>  6 files changed, 74 insertions(+)
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 08/13] vdpa/ifc: add get device type ops to ifc driver
  2022-05-23  9:35     ` [PATCH v9 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
@ 2022-05-24  3:02       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:02 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 08/13] vdpa/ifc: add get device type ops to ifc driver
> 
> Add get device type ops to ifc driver.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 7a0bdb4..1c5746a 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1189,6 +1189,29 @@ struct rte_vdpa_dev_info {
>  	return 0;
>  }
> 
> +static int
> +ifcvf_get_device_type(struct rte_vdpa_device *vdev,
> +	uint32_t *type)
> +{
> +	struct ifcvf_internal *internal;
> +	struct internal_list *list;
> +
> +	list = find_internal_resource_by_vdev(vdev);
> +	if (list == NULL) {
> +		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
> +		return -1;
> +	}
> +
> +	internal = list->internal;
> +
> +	if (internal->hw.device_type == IFCVF_BLK)
> +		*type = RTE_VHOST_VDPA_DEVICE_TYPE_BLK;
> +	else
> +		*type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
> +
> +	return 0;
> +}
> +
>  static struct rte_vdpa_dev_ops ifcvf_net_ops = {
>  	.get_queue_num = ifcvf_get_queue_num,
>  	.get_features = ifcvf_get_vdpa_features,
> @@ -1201,6 +1224,7 @@ struct rte_vdpa_dev_info {
>  	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
>  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
>  	.get_notify_area = ifcvf_get_notify_area,
> +	.get_dev_type = ifcvf_get_device_type,
>  };
> 
>  static inline int
> @@ -1327,6 +1351,7 @@ struct rte_vdpa_dev_info {
>  	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
>  	.get_notify_area = ifcvf_get_notify_area,
>  	.get_config = ifcvf_blk_get_config,
> +	.get_dev_type = ifcvf_get_device_type,
>  };
> 
>  struct rte_vdpa_dev_info dev_info[] = {
> --
> 1.8.3.1
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 09/13] examples/vdpa: add virtio blk support
  2022-05-23  9:35     ` [PATCH v9 09/13] examples/vdpa: add virtio blk support Andy Pei
@ 2022-05-24  3:03       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:03 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 09/13] examples/vdpa: add virtio blk support
> 
> Add virtio blk device support to vDPA example.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  examples/vdpa/main.c             | 56
> ++++++++++++++++++++++++++++++++++++++++
>  examples/vdpa/vdpa_blk_compact.h | 50 +++++++++++++++++++++++++++++++++++
>  2 files changed, 106 insertions(+)
>  create mode 100644 examples/vdpa/vdpa_blk_compact.h
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 10/13] usertools: add support for virtio blk device
  2022-05-23  9:35     ` [PATCH v9 10/13] usertools: add support for virtio blk device Andy Pei
@ 2022-05-24  3:05       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:05 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 10/13] usertools: add support for virtio blk device
> 
> Add virtio blk device support to devbind.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  usertools/dpdk-devbind.py | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
> index ace4627..18c7d67 100755
> --- a/usertools/dpdk-devbind.py
> +++ b/usertools/dpdk-devbind.py
> @@ -72,6 +72,9 @@
>  cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
>                   'SVendor': None, 'SDevice': None}
> 
> +virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001,1042',
> +                    'SVendor': None, 'SDevice': None}
> +
>  network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
>  baseband_devices = [acceleration_class]
>  crypto_devices = [encryption_class, intel_processor_class]
> @@ -82,7 +85,7 @@
>  compress_devices = [cavium_zip]
>  regex_devices = [cn9k_ree]
>  misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
> -                intel_ntb_skx, intel_ntb_icx]
> +                intel_ntb_skx, intel_ntb_icx, virtio_blk]
> 
>  # global dict ethernet devices present. Dictionary indexed by PCI address.
>  # Each device within this is itself a dictionary of device properties
> --
> 1.8.3.1
Acked-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 11/13] vdpa/ifc: add log for config space of virtio blk
  2022-05-23  9:35     ` [PATCH v9 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
@ 2022-05-24  3:06       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:06 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 11/13] vdpa/ifc: add log for config space of virtio blk
> 
> Add some log of virtio blk device config space information
> at VDPA launch before qemu connects.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 34 ++++++++++++++++++++++++++++++++++
>  1 file changed, 34 insertions(+)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 1c5746a..80d6261 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -1382,6 +1382,9 @@ struct rte_vdpa_dev_info dev_info[] = {
>  	struct rte_kvargs *kvlist = NULL;
>  	int ret = 0;
>  	int16_t device_id;
> +	uint64_t capacity = 0;
> +	uint8_t *byte;
> +	uint32_t i;
> 
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
> @@ -1448,6 +1451,37 @@ struct rte_vdpa_dev_info dev_info[] = {
>  		internal->features = features &
>  					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
>  		internal->features |= dev_info[IFCVF_BLK].features;
> +
> +		/* cannot read 64-bit register in one attempt,
> +		 * so read byte by byte.
> +		 */
> +		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> +			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> +			capacity |= (uint64_t)*byte << (i * 8);
> +		}
> +		/* The capacity is number of sectors in 512-byte.
> +		 * So right shift 1 bit  we get in K,
> +		 * another right shift 10 bits we get in M,
> +		 * right shift 10 more bits, we get in G.
> +		 * To show capacity in G, we right shift 21 bits in total.
> +		 */
> +		DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
> +
> +		DRV_LOG(DEBUG, "size_max  : 0x%08x",
> +			internal->hw.blk_cfg->size_max);
> +		DRV_LOG(DEBUG, "seg_max   : 0x%08x",
> +			internal->hw.blk_cfg->seg_max);
> +		DRV_LOG(DEBUG, "blk_size  : 0x%08x",
> +			internal->hw.blk_cfg->blk_size);
> +		DRV_LOG(DEBUG, "geometry");
> +		DRV_LOG(DEBUG, "    cylinders: %u",
> +			internal->hw.blk_cfg->geometry.cylinders);
> +		DRV_LOG(DEBUG, "    heads    : %u",
> +			internal->hw.blk_cfg->geometry.heads);
> +		DRV_LOG(DEBUG, "    sectors  : %u",
> +			internal->hw.blk_cfg->geometry.sectors);
> +		DRV_LOG(DEBUG, "num_queues: 0x%08x",
> +			internal->hw.blk_cfg->num_queues);
>  	}
> 
>  	list->internal = internal;
> --
> 1.8.3.1
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 12/13] vdpa/ifc: add interrupt handling for config space
  2022-05-23  9:35     ` [PATCH v9 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
@ 2022-05-24  3:09       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:09 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 12/13] vdpa/ifc: add interrupt handling for config
> space
> 
> Create a thread to poll and relay config space change interrupt.
> Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to inform QEMU.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 121
> +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 120 insertions(+), 1 deletion(-)
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk device
  2022-05-23  9:35     ` [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-05-24  3:10       ` Xia, Chenbo
  2022-05-24  3:11         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:10 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Monday, May 23, 2022 5:35 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk
> device
> 
> Register address is different between net and blk device.
> We are re-useing most of the code, when register address is
re-using
With this fixed:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> different, we have to check net and blk device go through
> different code.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.c | 34 +++++++++++++++++++++++++++-------
>  1 file changed, 27 insertions(+), 7 deletions(-)
> 
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk device
  2022-05-24  3:10       ` Xia, Chenbo
@ 2022-05-24  3:11         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-24  3:11 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Sure.
Thanks for your review.
I will address in V10.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Tuesday, May 24, 2022 11:10 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk
> device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Monday, May 23, 2022 5:35 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v9 13/13] vdpa/ifc/base: access correct register for
> > blk device
> >
> > Register address is different between net and blk device.
> > We are re-useing most of the code, when register address is
> 
> re-using
> 
> With this fixed:
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> 
> > different, we have to check net and blk device go through different
> > code.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.c | 34 +++++++++++++++++++++++++++-------
> >  1 file changed, 27 insertions(+), 7 deletions(-)
> >
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 03/13] vhost: add vhost msg support for get/set config
  2022-05-24  2:52       ` Xia, Chenbo
@ 2022-05-24  3:24         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-24  3:24 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Tuesday, May 24, 2022 10:52 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v9 03/13] vhost: add vhost msg support for get/set
> config
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Monday, May 23, 2022 5:35 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v9 03/13] vhost: add vhost msg support for get/set
> > config
> >
> > Add support for VHOST_USER_GET_CONFIG and
> VHOST_USER_SET_CONFIG.
> > VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> > supported by virtio blk VDPA device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  lib/vhost/vhost_user.c | 88
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/vhost/vhost_user.h | 13 ++++++++
> >  2 files changed, 101 insertions(+)
> >
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 850848c..b37d814 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -2468,6 +2468,92 @@ static int is_vring_iotlb(struct virtio_net
> > *dev,  }
> >
> >  static int
> > +vhost_user_get_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (!vdpa_dev) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +				 "(%s) is not vDPA device!\n",
> > +				 dev->ifname);
> > +		goto out;
> > +	}
> 
> In my understanding, this message is only targeted for vDPA device.
> Traditional SW back-end will not use it. So if vdpa_dev is not attached, you
> can just return RTE_VHOST_MSG_RESULT_ERR here. Another way is setting
> msg size to 0 and reply, but I think in this case. We should just break the msg
> handling.
> 
OK. I will just return RTE_VHOST_MSG_RESULT_ERR here.
> Besides, two lines are enough for the log above:
> 
> +		VHOST_LOG_CONFIG(ERR, "(%s) is not vDPA device!\n",
> +				 dev->ifname);
> 
Sure.
> Thanks,
> Chenbo
> 
> > +
> > +	if (vdpa_dev->ops->get_config) {
> > +		ret = vdpa_dev->ops->get_config(dev->vid,
> > +					   ctx->msg.payload.cfg.region,
> > +					   ctx->msg.payload.cfg.size);
> > +		if (ret != 0) {
> > +			ctx->msg.size = 0;
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) get_config() return error!\n",
> > +					 dev->ifname);
> > +		}
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) get_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +out:
> > +	return RTE_VHOST_MSG_RESULT_REPLY;
> > +}
> > +
> > +static int
> > +vhost_user_set_config(struct virtio_net **pdev,
> > +			struct vhu_msg_context *ctx,
> > +			int main_fd __rte_unused)
> > +{
> > +	struct virtio_net *dev = *pdev;
> > +	struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
> > +	int ret = 0;
> > +
> > +	if (validate_msg_fds(dev, ctx, 0) != 0)
> > +		return RTE_VHOST_MSG_RESULT_ERR;
> > +
> > +	if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +			"(%s) vhost_user_config size: %"PRIu32", should not
> be
> > larger than %d\n",
> > +			dev->ifname, ctx->msg.payload.cfg.size,
> > +			VHOST_USER_MAX_CONFIG_SIZE);
> > +		goto out;
> > +	}
> > +
> > +	if (!vdpa_dev) {
> > +		VHOST_LOG_CONFIG(ERR,
> > +				 "(%s) is not vDPA device!\n",
> > +				 dev->ifname);
> > +		goto out;
> > +	}
> > +
> > +	if (vdpa_dev->ops->set_config) {
> > +		ret = vdpa_dev->ops->set_config(dev->vid,
> > +			ctx->msg.payload.cfg.region,
> > +			ctx->msg.payload.cfg.offset,
> > +			ctx->msg.payload.cfg.size,
> > +			ctx->msg.payload.cfg.flags);
> > +		if (ret)
> > +			VHOST_LOG_CONFIG(ERR,
> > +					 "(%s) set_config() return error!\n",
> > +					 dev->ifname);
> > +	} else {
> > +		VHOST_LOG_CONFIG(ERR, "(%s) set_config() not
> supported!\n",
> > +				 dev->ifname);
> > +	}
> > +
> > +	return RTE_VHOST_MSG_RESULT_OK;
> > +
> > +out:
> > +	return RTE_VHOST_MSG_RESULT_ERR;
> > +}
> > +
> > +static int
> >  vhost_user_iotlb_msg(struct virtio_net **pdev,
> >  			struct vhu_msg_context *ctx,
> >  			int main_fd __rte_unused)
> > @@ -2686,6 +2772,8 @@ static int is_vring_iotlb(struct virtio_net
> > *dev,  VHOST_MESSAGE_HANDLER(VHOST_USER_NET_SET_MTU,
> > vhost_user_net_set_mtu,
> > false) \
> >  VHOST_MESSAGE_HANDLER(VHOST_USER_SET_SLAVE_REQ_FD,
> > vhost_user_set_req_fd,
> > true) \
> >  VHOST_MESSAGE_HANDLER(VHOST_USER_IOTLB_MSG,
> vhost_user_iotlb_msg,
> > false) \
> > +VHOST_MESSAGE_HANDLER(VHOST_USER_GET_CONFIG,
> vhost_user_get_config,
> > +false)
> > \
> > +VHOST_MESSAGE_HANDLER(VHOST_USER_SET_CONFIG,
> vhost_user_set_config,
> > +false)
> > \
> >  VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_ADVISE,
> > vhost_user_set_postcopy_advise, false) \
> > VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_LISTEN,
> > vhost_user_set_postcopy_listen, false) \
> > VHOST_MESSAGE_HANDLER(VHOST_USER_POSTCOPY_END,
> > vhost_user_postcopy_end,
> > false) \
> > diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index
> > ba1c5c7..c4d091e 100644
> > --- a/lib/vhost/vhost_user.h
> > +++ b/lib/vhost/vhost_user.h
> > @@ -50,6 +50,8 @@
> >  	VHOST_USER_NET_SET_MTU = 20,
> >  	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> >  	VHOST_USER_IOTLB_MSG = 22,
> > +	VHOST_USER_GET_CONFIG = 24,
> > +	VHOST_USER_SET_CONFIG = 25,
> >  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
> >  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> >  	VHOST_USER_POSTCOPY_ADVISE = 28,
> > @@ -123,6 +125,16 @@
> >  	uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#define VHOST_USER_MAX_CONFIG_SIZE		256
> > +
> > +/** Get/set config msg payload */
> > +struct vhost_user_config {
> > +	uint32_t offset;
> > +	uint32_t size;
> > +	uint32_t flags;
> > +	uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
> > +};
> > +
> >  typedef struct VhostUserMsg {
> >  	union {
> >  		uint32_t master; /* a VhostUserRequest value */ @@ -146,6
> +158,7 @@
> >  		VhostUserCryptoSessionParam crypto_session;
> >  		VhostUserVringArea area;
> >  		VhostUserInflight inflight;
> > +		struct vhost_user_config cfg;
> >  	} payload;
> >  	/* Nothing should be added after the payload */  } __rte_packed
> > VhostUserMsg;
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-24  2:55       ` Xia, Chenbo
@ 2022-05-24  3:27         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-24  3:27 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Tuesday, May 24, 2022 10:55 AM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Monday, May 23, 2022 5:35 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device
> >
> > For virtio blk device, re-use part of ifc driver ops.
> > Implement ifcvf_blk_get_config for virtio blk device.
> > Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio blk device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.h |  4 ++  drivers/vdpa/ifc/ifcvf_vdpa.c
> > | 91
> > ++++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 94 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index a761d49..b6fdfdb 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -65,6 +65,10 @@
> >  #define IFCVF_32_BIT_MASK		0xffffffff
> >
> >
> > +#ifndef VHOST_USER_PROTOCOL_F_CONFIG
> > +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> > +#endif
> > +
> >  struct ifcvf_pci_cap {
> >  	u8 cap_vndr;            /* Generic PCI field: PCI_CAP_ID_VNDR */
> >  	u8 cap_next;            /* Generic PCI field: next ptr. */
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 1eed90b..c1fc1d7 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -1087,6 +1087,10 @@ struct rte_vdpa_dev_info {
> >  		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
> >  		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
> >  		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
> > +
> > +#define VDPA_BLK_PROTOCOL_FEATURES \
> > +		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
> > +
> >  static int
> >  ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t
> > *features)
> >  {
> > @@ -1199,6 +1203,91 @@ struct rte_vdpa_dev_info {
> >  	return device_id;
> >  }
> >
> > +static int
> > +ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t size) {
> > +	struct virtio_blk_config *dev_cfg;
> > +	struct ifcvf_internal *internal;
> > +	struct rte_vdpa_device *vdev;
> > +	struct internal_list *list;
> > +	uint32_t i;
> > +	uint64_t capacity = 0;
> > +	uint8_t *byte;
> > +
> > +	if (size != sizeof(struct virtio_blk_config)) {
> > +		DRV_LOG(ERR, "Invalid len: %u, required: %u",
> > +			size, (uint32_t)sizeof(struct virtio_blk_config));
> > +		return -1;
> > +	}
> > +
> > +	vdev = rte_vhost_get_vdpa_device(vid);
> 
> Check vdev is not NULL here.
> 
Sure, I will fix it in next version.
> Thanks
> Chenbo
> 
> > +	list = find_internal_resource_by_vdev(vdev);
> > +	if (list == NULL) {
> > +		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
> > +		return -1;
> > +	}
> > +
> > +	internal = list->internal;
> > +
> > +	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
> > +		config[i] = *((u8 *)internal->hw.blk_cfg + i);
> > +
> > +	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
> > +
> > +	/* cannot read 64-bit register in one attempt, so read byte by byte.
> > */
> > +	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
> > +		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
> > +		capacity |= (uint64_t)*byte << (i * 8);
> > +	}
> > +	/* The capacity is number of sectors in 512-byte.
> > +	 * So right shift 1 bit  we get in K,
> > +	 * another right shift 10 bits we get in M,
> > +	 * right shift 10 more bits, we get in G.
> > +	 * To show capacity in G, we right shift 21 bits in total.
> > +	 */
> > +	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
> > +
> > +	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
> > +	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
> > +	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
> > +	DRV_LOG(DEBUG, "geometry");
> > +	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
> > +	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
> > +	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
> > +	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
> > +
> > +	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
> > +		config[0], config[1], config[2], config[3], config[4],
> > +		config[5], config[6], config[7]);
> > +	return 0;
> > +}
> > +
> > +static int
> > +ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
> > +	uint64_t *features)
> > +{
> > +	RTE_SET_USED(vdev);
> > +
> > +	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
> > +	*features |= VDPA_BLK_PROTOCOL_FEATURES;
> > +	return 0;
> > +}
> > +
> > +static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
> > +	.get_queue_num = ifcvf_get_queue_num,
> > +	.get_features = ifcvf_get_vdpa_features,
> > +	.set_features = ifcvf_set_features,
> > +	.get_protocol_features = ifcvf_blk_get_protocol_features,
> > +	.dev_conf = ifcvf_dev_config,
> > +	.dev_close = ifcvf_dev_close,
> > +	.set_vring_state = ifcvf_set_vring_state,
> > +	.migration_done = NULL,
> > +	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
> > +	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
> > +	.get_notify_area = ifcvf_get_notify_area,
> > +	.get_config = ifcvf_blk_get_config,
> > +};
> > +
> >  struct rte_vdpa_dev_info dev_info[] = {
> >  	{
> >  		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
> @@ -1211,7
> > +1300,7 @@ struct rte_vdpa_dev_info dev_info[] = {
> >  	{
> >  		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
> |
> >  			    (1ULL << VHOST_F_LOG_ALL),
> > -		.ops = NULL,
> > +		.ops = &ifcvf_blk_ops,
> >  	},
> >  };
> >
> > --
> > 1.8.3.1
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v10 03/13] vhost: add vhost msg support for get/set config
  2022-05-24  2:48     ` [PATCH v10 03/13] vhost: add vhost msg support for get/set config Andy Pei
@ 2022-05-24  3:49       ` Xia, Chenbo
  0 siblings, 0 replies; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  3:49 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, May 24, 2022 10:48 AM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v10 03/13] vhost: add vhost msg support for get/set config
> 
> Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
> VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
> supported by virtio blk VDPA device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  lib/vhost/vhost_user.c | 85
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/vhost/vhost_user.h | 13 ++++++++
>  2 files changed, 98 insertions(+)
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-24  2:48     ` [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
@ 2022-05-24  6:28       ` Xia, Chenbo
  2022-05-24  6:30         ` Pei, Andy
  0 siblings, 1 reply; 263+ messages in thread
From: Xia, Chenbo @ 2022-05-24  6:28 UTC (permalink / raw)
  To: Pei, Andy, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Tuesday, May 24, 2022 10:48 AM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>; Xu,
> Rosen <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device
> 
> For virtio blk device, re-use part of ifc driver ops.
> Implement ifcvf_blk_get_config for virtio blk device.
> Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
> blk device.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>  drivers/vdpa/ifc/base/ifcvf.h |  4 ++
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 96
> ++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 99 insertions(+), 1 deletion(-)
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device
  2022-05-24  6:28       ` Xia, Chenbo
@ 2022-05-24  6:30         ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-05-24  6:30 UTC (permalink / raw)
  To: Xia, Chenbo, dev
  Cc: maxime.coquelin, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Hi Chenbo,
Thanks for your effort.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Tuesday, May 24, 2022 2:28 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>;
> Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: RE: [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device
> 
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Tuesday, May 24, 2022 10:48 AM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>; Xu, Rosen <rosen.xu@intel.com>; Xiao,
> > QimaiX <qimaix.xiao@intel.com>
> > Subject: [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device
> >
> > For virtio blk device, re-use part of ifc driver ops.
> > Implement ifcvf_blk_get_config for virtio blk device.
> > Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio blk device.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> >  drivers/vdpa/ifc/base/ifcvf.h |  4 ++  drivers/vdpa/ifc/ifcvf_vdpa.c
> > | 96
> > ++++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 99 insertions(+), 1 deletion(-)
> 
> 
> Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
^ permalink raw reply	[flat|nested] 263+ messages in thread
* Re: [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc
  2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
                       ` (12 preceding siblings ...)
  2022-05-24  2:48     ` [PATCH v10 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
@ 2022-06-01 10:10     ` Maxime Coquelin
  2022-06-01 13:57       ` Pei, Andy
  13 siblings, 1 reply; 263+ messages in thread
From: Maxime Coquelin @ 2022-06-01 10:10 UTC (permalink / raw)
  To: Andy Pei, dev; +Cc: chenbo.xia, gang.cao, changpeng.liu, rosen.xu, qimaix.xiao
On 5/24/22 04:48, Andy Pei wrote:
> This patch set add virtio_blk device support to vdpa/ifc driver.
> With a lot of similarities, I re-use part of vdpa/ifc driver.
> Distinguish the virtio net and blk device by device id, and implement
> specific features and ops.
> Add example to vdpa to support virtio_blk device.
> To support blk device live migration, some modification to vhost lib.
> Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
> 
> 
> v10:
>   fix typo.
>   add more NULL pointer check.
>   fix a error handler.
> 
> v9:
>   fix some naming issue.
>   add more NULL pointer check.
>   close open file when errors occur.
> 
> v8:
>   delete some redundant code.
>   fix some commit log.
> 
> v7:
>   Check on expected fd num in new vhost msg handler.
>   Sanity check on vhost msg size.
>   Fix typo.
>   Add commit log to help understand code.
>   Remove duplicated code.
>   Add new API to get vDPA device type.
> 
> v6:
>   fix some commit log.
>   add vhost socket in log output to make it more user-friendly.
>   when driver ops fail, just output some log, do not break message handler.
>   check vhost msg size in msg handler.
> v5:
>   fix some coding style issues.
> v4:
>   add args "isblk" to vdpa example to specify a block device, fix some
>   issue in example.
>   Make sure code specify for block device does not affect net device.
> v3:
>   Fix some compile issues.
> v2:
>   Fix some coding style issues.
> 
> Andy Pei (13):
>    vdpa/ifc: add support for virtio blk device
>    vhost: add vDPA ops for blk device
>    vhost: add vhost msg support for get/set config
>    vdpa/ifc: add blk ops for ifc device
>    vdpa/ifc: add vDPA interrupt relay for blk device
>    vdpa/ifc: add block device SW live-migration
>    vhost: add API to get vDPA device type
>    vdpa/ifc: add get device type ops to ifc driver
>    examples/vdpa: add virtio blk support
>    usertools: add support for virtio blk device
>    vdpa/ifc: add log for config space of virtio blk
>    vdpa/ifc: add interrupt handling for config space
>    vdpa/ifc/base: access correct register for blk device
> 
>   doc/guides/prog_guide/vhost_lib.rst    |   5 +
>   doc/guides/rel_notes/release_22_07.rst |   4 +
>   drivers/vdpa/ifc/base/ifcvf.c          |  34 ++-
>   drivers/vdpa/ifc/base/ifcvf.h          |  27 ++-
>   drivers/vdpa/ifc/ifcvf_vdpa.c          | 420 +++++++++++++++++++++++++++++++--
>   examples/vdpa/main.c                   |  56 +++++
>   examples/vdpa/vdpa_blk_compact.h       |  50 ++++
>   lib/vhost/rte_vhost.h                  |  17 ++
>   lib/vhost/socket.c                     |  44 ++++
>   lib/vhost/vdpa_driver.h                |  11 +-
>   lib/vhost/version.map                  |   1 +
>   lib/vhost/vhost_user.c                 |  85 +++++++
>   lib/vhost/vhost_user.h                 |  13 +
>   usertools/dpdk-devbind.py              |   5 +-
>   14 files changed, 741 insertions(+), 31 deletions(-)
>   create mode 100644 examples/vdpa/vdpa_blk_compact.h
> 
Applied to dpdk-next-virtio/main.
Thanks,
Maxime
^ permalink raw reply	[flat|nested] 263+ messages in thread
* RE: [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc
  2022-06-01 10:10     ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Maxime Coquelin
@ 2022-06-01 13:57       ` Pei, Andy
  0 siblings, 0 replies; 263+ messages in thread
From: Pei, Andy @ 2022-06-01 13:57 UTC (permalink / raw)
  To: Maxime Coquelin, dev
  Cc: Xia, Chenbo, Cao, Gang, Liu, Changpeng, Xu, Rosen, Xiao, QimaiX
Thanks Maxime
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, June 1, 2022 6:11 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: Xia, Chenbo <Chenbo.Xia@intel.com>; Cao, Gang <gang.cao@intel.com>;
> Liu, Changpeng <changpeng.liu@intel.com>; Xu, Rosen
> <rosen.xu@intel.com>; Xiao, QimaiX <qimaix.xiao@intel.com>
> Subject: Re: [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc
> 
> 
> 
> On 5/24/22 04:48, Andy Pei wrote:
> > This patch set add virtio_blk device support to vdpa/ifc driver.
> > With a lot of similarities, I re-use part of vdpa/ifc driver.
> > Distinguish the virtio net and blk device by device id, and implement
> > specific features and ops.
> > Add example to vdpa to support virtio_blk device.
> > To support blk device live migration, some modification to vhost lib.
> > Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.
> >
> >
> > v10:
> >   fix typo.
> >   add more NULL pointer check.
> >   fix a error handler.
> >
> > v9:
> >   fix some naming issue.
> >   add more NULL pointer check.
> >   close open file when errors occur.
> >
> > v8:
> >   delete some redundant code.
> >   fix some commit log.
> >
> > v7:
> >   Check on expected fd num in new vhost msg handler.
> >   Sanity check on vhost msg size.
> >   Fix typo.
> >   Add commit log to help understand code.
> >   Remove duplicated code.
> >   Add new API to get vDPA device type.
> >
> > v6:
> >   fix some commit log.
> >   add vhost socket in log output to make it more user-friendly.
> >   when driver ops fail, just output some log, do not break message handler.
> >   check vhost msg size in msg handler.
> > v5:
> >   fix some coding style issues.
> > v4:
> >   add args "isblk" to vdpa example to specify a block device, fix some
> >   issue in example.
> >   Make sure code specify for block device does not affect net device.
> > v3:
> >   Fix some compile issues.
> > v2:
> >   Fix some coding style issues.
> >
> > Andy Pei (13):
> >    vdpa/ifc: add support for virtio blk device
> >    vhost: add vDPA ops for blk device
> >    vhost: add vhost msg support for get/set config
> >    vdpa/ifc: add blk ops for ifc device
> >    vdpa/ifc: add vDPA interrupt relay for blk device
> >    vdpa/ifc: add block device SW live-migration
> >    vhost: add API to get vDPA device type
> >    vdpa/ifc: add get device type ops to ifc driver
> >    examples/vdpa: add virtio blk support
> >    usertools: add support for virtio blk device
> >    vdpa/ifc: add log for config space of virtio blk
> >    vdpa/ifc: add interrupt handling for config space
> >    vdpa/ifc/base: access correct register for blk device
> >
> >   doc/guides/prog_guide/vhost_lib.rst    |   5 +
> >   doc/guides/rel_notes/release_22_07.rst |   4 +
> >   drivers/vdpa/ifc/base/ifcvf.c          |  34 ++-
> >   drivers/vdpa/ifc/base/ifcvf.h          |  27 ++-
> >   drivers/vdpa/ifc/ifcvf_vdpa.c          | 420
> +++++++++++++++++++++++++++++++--
> >   examples/vdpa/main.c                   |  56 +++++
> >   examples/vdpa/vdpa_blk_compact.h       |  50 ++++
> >   lib/vhost/rte_vhost.h                  |  17 ++
> >   lib/vhost/socket.c                     |  44 ++++
> >   lib/vhost/vdpa_driver.h                |  11 +-
> >   lib/vhost/version.map                  |   1 +
> >   lib/vhost/vhost_user.c                 |  85 +++++++
> >   lib/vhost/vhost_user.h                 |  13 +
> >   usertools/dpdk-devbind.py              |   5 +-
> >   14 files changed, 741 insertions(+), 31 deletions(-)
> >   create mode 100644 examples/vdpa/vdpa_blk_compact.h
> >
> 
> Applied to dpdk-next-virtio/main.
> 
> Thanks,
> Maxime
^ permalink raw reply	[flat|nested] 263+ messages in thread
end of thread, other threads:[~2022-06-01 13:58 UTC | newest]
Thread overview: 263+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-25  6:47 [PATCH 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
2022-01-25  6:47 ` [PATCH 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
2022-01-25  9:37   ` [PATCH v2 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
2022-01-25  9:37     ` [PATCH v2 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
2022-01-25  9:37     ` [PATCH v2 02/15] vhost: add vdpa ops for " Andy Pei
2022-01-25  9:37     ` [PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-01-25  9:37     ` [PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
2022-01-25  9:37     ` [PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
2022-01-25  9:37     ` [PATCH v2 06/15] example/vdpa:add vdpa blk support in example Andy Pei
2022-01-25  9:37     ` [PATCH v2 07/15] usertools: add support for virtio blk device Andy Pei
2022-01-25  9:37     ` [PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
2022-01-25  9:37     ` [PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
2022-01-25  9:37     ` [PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
2022-01-25  9:37     ` [PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
2022-01-25  9:37     ` [PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
2022-01-25  9:37     ` [PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
2022-01-25  9:37     ` [PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
2022-01-25  9:37     ` [PATCH v2 15/15] vhost: make sure each queue callfd is configured Andy Pei
2022-01-27  7:13       ` Xia, Chenbo
2022-01-29  3:11         ` Pei, Andy
2022-01-29  3:03   ` [PATCH v3 00/15] add virtio_blk device support to vdpa/ifc Andy Pei
2022-01-29  3:03     ` [PATCH v3 01/15] vdpa/ifc: add support for virtio blk device Andy Pei
2022-03-22  8:57       ` Maxime Coquelin
2022-01-29  3:03     ` [PATCH v3 02/15] vhost: add vdpa ops for " Andy Pei
2022-03-22  9:12       ` Maxime Coquelin
2022-01-29  3:03     ` [PATCH v3 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-03-22  9:58       ` Maxime Coquelin
2022-01-29  3:03     ` [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
2022-03-22 10:04       ` Maxime Coquelin
2022-03-23  7:07         ` Pei, Andy
2022-03-23  7:42           ` Pei, Andy
2022-01-29  3:03     ` [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
2022-03-22 11:10       ` Maxime Coquelin
2022-03-23  9:08         ` Pei, Andy
2022-01-29  3:03     ` [PATCH v3 06/15] example/vdpa:add vdpa blk support in example Andy Pei
2022-03-22 11:29       ` Maxime Coquelin
2022-03-23  9:31         ` Pei, Andy
2022-01-29  3:03     ` [PATCH v3 07/15] usertools: add support for virtio blk device Andy Pei
2022-01-29  3:03     ` [PATCH v3 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
2022-01-29  3:03     ` [PATCH v3 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
2022-01-29  3:03     ` [PATCH v3 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
2022-01-29  3:03     ` [PATCH v3 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
2022-01-29  3:03     ` [PATCH v3 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
2022-01-29  3:03     ` [PATCH v3 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
2022-01-29  3:03     ` [PATCH v3 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause Andy Pei
2022-01-29  3:03     ` [PATCH v3 15/15] vhost: make sure each queue callfd is configured Andy Pei
2022-03-27 14:51   ` [PATCH v4 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
2022-03-27 14:51     ` [PATCH v4 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
2022-03-27 14:51     ` [PATCH v4 02/16] vhost: add vdpa ops for " Andy Pei
2022-03-27 14:51     ` [PATCH v4 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
2022-03-27 14:51     ` [PATCH v4 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-03-27 14:51     ` [PATCH v4 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
2022-03-27 14:51     ` [PATCH v4 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
2022-03-27 14:51     ` [PATCH v4 07/16] example/vdpa:add vdpa blk support in example Andy Pei
2022-03-27 14:51     ` [PATCH v4 08/16] usertools: add support for virtio blk device Andy Pei
2022-03-27 20:01       ` Stephen Hemminger
2022-03-27 14:51     ` [PATCH v4 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
2022-03-27 14:51     ` [PATCH v4 10/16] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
2022-03-27 14:51     ` [PATCH v4 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
2022-03-27 14:51     ` [PATCH v4 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
2022-03-27 14:51     ` [PATCH v4 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
2022-03-27 14:51     ` [PATCH v4 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
2022-03-27 14:51     ` [PATCH v4 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
2022-03-27 14:51     ` [PATCH v4 16/16] vhost: make sure each queue callfd is configured Andy Pei
2022-03-28  7:17   ` [PATCH v5 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
2022-03-28  7:17     ` [PATCH v5 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
2022-03-28  7:17     ` [PATCH v5 02/16] vhost: add vdpa ops for " Andy Pei
2022-03-28  7:17     ` [PATCH v5 03/16] vhost: add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG Andy Pei
2022-04-20 13:53       ` Xia, Chenbo
2022-04-21  8:05         ` Pei, Andy
2022-03-28  7:17     ` [PATCH v5 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-03-28  7:17     ` [PATCH v5 05/16] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
2022-03-28  7:17     ` [PATCH v5 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
2022-03-28  7:17     ` [PATCH v5 07/16] example/vdpa:add vdpa blk support in example Andy Pei
2022-03-28  7:17     ` [PATCH v5 08/16] usertools: add support for virtio blk device Andy Pei
2022-03-28  7:17     ` [PATCH v5 09/16] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
2022-03-28  7:17     ` [PATCH v5 10/16] vdpa/ifc: add some log at VDPA launch before qemu connect Andy Pei
2022-03-28  7:17     ` [PATCH v5 11/16] vdpa/ifc: read virtio max_queues from hardware Andy Pei
2022-03-28  7:17     ` [PATCH v5 12/16] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
2022-03-28  7:17     ` [PATCH v5 13/16] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
2022-03-28  7:17     ` [PATCH v5 14/16] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
2022-03-28  7:17     ` [PATCH v5 15/16] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
2022-03-28  7:17     ` [PATCH v5 16/16] vhost: make sure each queue callfd is configured Andy Pei
2022-04-21  8:33   ` [PATCH v6 00/16] add virtio_blk device support to vdpa/ifc Andy Pei
2022-04-21  8:33     ` [PATCH v6 01/16] vdpa/ifc: add support for virtio blk device Andy Pei
2022-04-21  8:33     ` [PATCH v6 02/16] vhost: add vDPA ops for " Andy Pei
2022-04-21  8:33     ` [PATCH v6 03/16] vhost: add vhost msg support Andy Pei
2022-04-25 12:42       ` Xia, Chenbo
2022-04-26  8:55         ` Pei, Andy
2022-04-26  9:17           ` Xia, Chenbo
2022-04-27  4:12             ` Pei, Andy
2022-04-25 13:04       ` David Marchand
2022-04-26  8:08         ` Pei, Andy
2022-04-21  8:33     ` [PATCH v6 04/16] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-04-21  8:33     ` [PATCH v6 05/16] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
2022-04-25 12:58       ` Xia, Chenbo
2022-04-26  9:56         ` Pei, Andy
2022-04-21  8:33     ` [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Andy Pei
2022-04-25 13:10       ` Xia, Chenbo
2022-04-26 10:07         ` Pei, Andy
2022-04-21  8:33     ` [PATCH v6 07/16] examples/vdpa: add vDPA blk support in example Andy Pei
2022-04-25 13:38       ` Xia, Chenbo
2022-04-27  4:11         ` Pei, Andy
2022-04-21  8:33     ` [PATCH v6 08/16] usertools: add support for virtio blk device Andy Pei
2022-04-25 13:53       ` Xia, Chenbo
2022-04-26  4:13         ` Pei, Andy
2022-04-21  8:33     ` [PATCH v6 09/16] vdpa/ifc: add set vring state for " Andy Pei
2022-04-21  8:33     ` [PATCH v6 10/16] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
2022-04-21  8:33     ` [PATCH v6 11/16] vdpa/ifc: read virtio max queues from hardware Andy Pei
2022-04-21  8:33     ` [PATCH v6 12/16] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
2022-04-21  8:33     ` [PATCH v6 13/16] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
2022-04-21  8:33     ` [PATCH v6 14/16] vdpa/ifc/base: access correct register for blk device Andy Pei
2022-04-21  8:33     ` [PATCH v6 15/16] vdpa/ifc: blk device pause without no inflight IO Andy Pei
2022-04-21  8:33     ` [PATCH v6 16/16] vhost: make sure each queue callfd is configured Andy Pei
2022-04-27  8:29   ` [PATCH v7 00/18] add virtio_blk device support to vdpa/ifc Andy Pei
2022-04-27  8:29     ` [PATCH v7 01/18] vdpa/ifc: add support for virtio blk device Andy Pei
2022-04-27  8:29     ` [PATCH v7 02/18] vhost: add vDPA ops for " Andy Pei
2022-04-27  8:29     ` [PATCH v7 03/18] vhost: add vhost msg support Andy Pei
2022-05-11 14:24       ` Xia, Chenbo
2022-05-12  3:50         ` Pei, Andy
2022-04-27  8:29     ` [PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-04-27  8:29     ` [PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device Andy Pei
2022-05-11 14:35       ` Xia, Chenbo
2022-05-12  3:49         ` Pei, Andy
2022-04-27  8:29     ` [PATCH v7 06/18] vdpa/ifc: add block device SW live-migration Andy Pei
2022-05-12 12:55       ` Xia, Chenbo
2022-05-13  3:32         ` Pei, Andy
2022-04-27  8:29     ` [PATCH v7 07/18] vhost: add API to get vDPA device type Andy Pei
2022-05-12 13:14       ` Xia, Chenbo
2022-05-13  4:15         ` Pei, Andy
2022-04-27  8:29     ` [PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver Andy Pei
2022-05-12 13:21       ` Xia, Chenbo
2022-05-12 13:40         ` Xia, Chenbo
2022-05-13  7:38           ` Pei, Andy
2022-04-27  8:29     ` [PATCH v7 09/18] examples/vdpa: add vDPA blk support in example Andy Pei
2022-05-12 13:34       ` Xia, Chenbo
2022-05-13  8:16         ` Pei, Andy
2022-04-27  8:29     ` [PATCH v7 10/18] usertools: add support for virtio blk device Andy Pei
2022-04-27  8:29     ` [PATCH v7 11/18] vdpa/ifc: add set vring state for " Andy Pei
2022-05-12 13:44       ` Xia, Chenbo
2022-04-27  8:30     ` [PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect Andy Pei
2022-05-12 13:53       ` Xia, Chenbo
2022-05-13  8:34         ` Pei, Andy
2022-05-13  8:40           ` Xia, Chenbo
2022-05-13 15:37           ` Stephen Hemminger
2022-05-16  1:03             ` Pei, Andy
2022-04-27  8:30     ` [PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware Andy Pei
2022-05-12 13:55       ` Xia, Chenbo
2022-05-13  8:58         ` Pei, Andy
2022-04-27  8:30     ` [PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk Andy Pei
2022-05-13  2:52       ` Xia, Chenbo
2022-05-13 10:10         ` Pei, Andy
2022-04-27  8:30     ` [PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct Andy Pei
2022-05-13  2:55       ` Xia, Chenbo
2022-05-16  3:05         ` Pei, Andy
2022-04-27  8:30     ` [PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device Andy Pei
2022-05-13  2:57       ` Xia, Chenbo
2022-05-16  4:19         ` Pei, Andy
2022-04-27  8:30     ` [PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO Andy Pei
2022-05-13  2:59       ` Xia, Chenbo
2022-05-16  4:20         ` Pei, Andy
2022-04-27  8:30     ` [PATCH v7 18/18] vhost: make sure each queue callfd is configured Andy Pei
2022-05-13  3:10       ` Xia, Chenbo
2022-05-18 12:13   ` [PATCH v8 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
2022-05-18 12:13     ` [PATCH v8 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
2022-05-23  3:41       ` Xia, Chenbo
2022-05-23  4:33         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 02/13] vhost: add vDPA ops for " Andy Pei
2022-05-23  3:46       ` Xia, Chenbo
2022-05-23  4:38         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 03/13] vhost: add vhost msg support for get/set config Andy Pei
2022-05-23  3:54       ` Xia, Chenbo
2022-05-23  4:49         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-05-23  4:07       ` Xia, Chenbo
2022-05-23  5:04         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
2022-05-23  4:10       ` Xia, Chenbo
2022-05-18 12:13     ` [PATCH v8 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
2022-05-23  5:25       ` Xia, Chenbo
2022-05-23  5:31         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 07/13] vhost: add API to get vDPA device type Andy Pei
2022-05-23  7:26       ` Xia, Chenbo
2022-05-23  8:23         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
2022-05-23  7:30       ` Xia, Chenbo
2022-05-23  8:31         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 09/13] examples/vdpa: add add virtio blk support Andy Pei
2022-05-23  7:40       ` Xia, Chenbo
2022-05-23  8:38         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 10/13] usertools: add support for virtio blk device Andy Pei
2022-05-23  7:43       ` Xia, Chenbo
2022-05-23  8:49         ` Pei, Andy
2022-05-18 12:13     ` [PATCH v8 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
2022-05-23  7:46       ` Xia, Chenbo
2022-05-18 12:13     ` [PATCH v8 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
2022-05-23  7:54       ` Xia, Chenbo
2022-05-18 12:13     ` [PATCH v8 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
2022-05-23  7:55       ` Xia, Chenbo
2022-05-23  9:03         ` Pei, Andy
2022-05-23  9:35   ` [PATCH v9 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
2022-05-23  9:35     ` [PATCH v9 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
2022-05-24  2:37       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 02/13] vhost: add vDPA ops for " Andy Pei
2022-05-24  2:38       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 03/13] vhost: add vhost msg support for get/set config Andy Pei
2022-05-24  2:52       ` Xia, Chenbo
2022-05-24  3:24         ` Pei, Andy
2022-05-23  9:35     ` [PATCH v9 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-05-24  2:55       ` Xia, Chenbo
2022-05-24  3:27         ` Pei, Andy
2022-05-23  9:35     ` [PATCH v9 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
2022-05-24  2:58       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
2022-05-24  3:00       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 07/13] vhost: add API to get vDPA device type Andy Pei
2022-05-24  3:01       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
2022-05-24  3:02       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 09/13] examples/vdpa: add virtio blk support Andy Pei
2022-05-24  3:03       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 10/13] usertools: add support for virtio blk device Andy Pei
2022-05-24  3:05       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
2022-05-24  3:06       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
2022-05-24  3:09       ` Xia, Chenbo
2022-05-23  9:35     ` [PATCH v9 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
2022-05-24  3:10       ` Xia, Chenbo
2022-05-24  3:11         ` Pei, Andy
2022-05-24  2:48   ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Andy Pei
2022-05-24  2:48     ` [PATCH v10 01/13] vdpa/ifc: add support for virtio blk device Andy Pei
2022-05-24  2:48     ` [PATCH v10 02/13] vhost: add vDPA ops for " Andy Pei
2022-05-24  2:48     ` [PATCH v10 03/13] vhost: add vhost msg support for get/set config Andy Pei
2022-05-24  3:49       ` Xia, Chenbo
2022-05-24  2:48     ` [PATCH v10 04/13] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-05-24  6:28       ` Xia, Chenbo
2022-05-24  6:30         ` Pei, Andy
2022-05-24  2:48     ` [PATCH v10 05/13] vdpa/ifc: add vDPA interrupt relay for blk device Andy Pei
2022-05-24  2:48     ` [PATCH v10 06/13] vdpa/ifc: add block device SW live-migration Andy Pei
2022-05-24  2:48     ` [PATCH v10 07/13] vhost: add API to get vDPA device type Andy Pei
2022-05-24  2:48     ` [PATCH v10 08/13] vdpa/ifc: add get device type ops to ifc driver Andy Pei
2022-05-24  2:48     ` [PATCH v10 09/13] examples/vdpa: add virtio blk support Andy Pei
2022-05-24  2:48     ` [PATCH v10 10/13] usertools: add support for virtio blk device Andy Pei
2022-05-24  2:48     ` [PATCH v10 11/13] vdpa/ifc: add log for config space of virtio blk Andy Pei
2022-05-24  2:48     ` [PATCH v10 12/13] vdpa/ifc: add interrupt handling for config space Andy Pei
2022-05-24  2:48     ` [PATCH v10 13/13] vdpa/ifc/base: access correct register for blk device Andy Pei
2022-06-01 10:10     ` [PATCH v10 00/13] add virtio_blk device support to vdpa/ifc Maxime Coquelin
2022-06-01 13:57       ` Pei, Andy
2022-01-25  6:47 ` [PATCH 02/15] vhost: add vdpa ops for blk device Andy Pei
2022-01-25  6:47 ` [PATCH 03/15] vdpa/ifc: add blk ops for ifc device Andy Pei
2022-01-25  6:47 ` [PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device Andy Pei
2022-01-25  6:47 ` [PATCH 05/15] vdpa/ifc: add blk dev sw live migration Andy Pei
2022-01-25  6:47 ` [PATCH 06/15] example/vdpa:add vdpa blk support in example Andy Pei
2022-01-25  6:47 ` [PATCH 07/15] usertools: add support for virtio blk device Andy Pei
2022-01-25  6:47 ` [PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for " Andy Pei
2022-01-25  6:47 ` [PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect Andy Pei
2022-01-25  6:47 ` [PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware Andy Pei
2022-01-25  6:47 ` [PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk Andy Pei
2022-01-25  6:47 ` [PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe Andy Pei
2022-01-25  6:47 ` [PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device Andy Pei
2022-01-25  6:47 ` [PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause Andy Pei
2022-01-25  6:47 ` [PATCH 15/15] vhost: make sure each queue callfd is configured Andy Pei
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).