DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 0/2] vhost: add VDUSE reconnection support
@ 2024-09-05 14:26 Maxime Coquelin
  2024-09-05 14:26 ` [PATCH 1/2] vhost: add logging mechanism for reconnection Maxime Coquelin
  2024-09-05 14:26 ` [PATCH 2/2] vhost: add reconnection support to VDUSE Maxime Coquelin
  0 siblings, 2 replies; 7+ messages in thread
From: Maxime Coquelin @ 2024-09-05 14:26 UTC (permalink / raw)
  To: dev, david.marchand, chenbox; +Cc: Maxime Coquelin

This series adds support for VDUSE reconnection.

First patch introduces the reconnection file layout and
track the virtqueues available index updates in the
datapath and control queue.

Second patch adds VDUSE reconnect intialization and some
sanity checks to prevent incompatible reconnections.

ToDos indentified for v2:
========================
- More sanity checks at reconnection
- Investigate reconnection struct versionning
  for backward compatibility.
- Add EAL helper to get tmpfs path.

Maxime Coquelin (2):
  vhost: add logging mechanism for reconnection
  vhost: add reconnection support to VDUSE

 lib/vhost/vduse.c           | 280 +++++++++++++++++++++++++++++++-----
 lib/vhost/vhost.h           |  40 +++++-
 lib/vhost/virtio_net.c      |   8 ++
 lib/vhost/virtio_net_ctrl.c |   2 +
 4 files changed, 288 insertions(+), 42 deletions(-)

-- 
2.46.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] vhost: add logging mechanism for reconnection
  2024-09-05 14:26 [PATCH 0/2] vhost: add VDUSE reconnection support Maxime Coquelin
@ 2024-09-05 14:26 ` Maxime Coquelin
  2024-09-05 14:26 ` [PATCH 2/2] vhost: add reconnection support to VDUSE Maxime Coquelin
  1 sibling, 0 replies; 7+ messages in thread
From: Maxime Coquelin @ 2024-09-05 14:26 UTC (permalink / raw)
  To: dev, david.marchand, chenbox; +Cc: Maxime Coquelin

This patch introduces a way for backend to keep track
of the needed information to be able to reconnect without
frontend cooperation.

It will be used for VDUSE, which does not provide interface
for the backend to save and later recover local virtqueues
metadata needed to reconnect.

Vhost-user support could also be added for improved packed
ring reconnection support.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vhost.h           | 40 ++++++++++++++++++++++++++++++++++---
 lib/vhost/virtio_net.c      |  8 ++++++++
 lib/vhost/virtio_net_ctrl.c |  2 ++
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index cd3fa55f1b..9420859c3a 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -269,6 +269,23 @@ struct vhost_async {
 	};
 };
 
+#define VHOST_MAX_VRING			0x100
+#define VHOST_MAX_QUEUE_PAIRS		0x80
+
+struct __rte_cache_aligned vhost_reconnect_vring {
+	uint16_t last_avail_idx;
+	bool avail_wrap_counter;
+};
+
+struct vhost_reconnect_data {
+	uint32_t version;
+	uint64_t features;
+	uint8_t status;
+	struct virtio_net_config config;
+	uint32_t nr_vrings;
+	struct vhost_reconnect_vring vring[VHOST_MAX_VRING];
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
@@ -351,6 +368,7 @@ struct __rte_cache_aligned vhost_virtqueue {
 	struct virtqueue_stats	stats;
 
 	RTE_ATOMIC(bool) irq_pending;
+	struct vhost_reconnect_vring *reconnect_log;
 };
 
 /* Virtio device status as per Virtio specification */
@@ -362,9 +380,6 @@ struct __rte_cache_aligned vhost_virtqueue {
 #define VIRTIO_DEVICE_STATUS_DEV_NEED_RESET	0x40
 #define VIRTIO_DEVICE_STATUS_FAILED		0x80
 
-#define VHOST_MAX_VRING			0x100
-#define VHOST_MAX_QUEUE_PAIRS		0x80
-
 /* Declare IOMMU related bits for older kernels */
 #ifndef VIRTIO_F_IOMMU_PLATFORM
 
@@ -538,8 +553,26 @@ struct __rte_cache_aligned virtio_net {
 	struct rte_vhost_user_extern_ops extern_ops;
 
 	struct vhost_backend_ops *backend_ops;
+
+	struct vhost_reconnect_data *reconnect_log;
 };
 
+static __rte_always_inline void
+vhost_virtqueue_reconnect_log_split(struct vhost_virtqueue *vq)
+{
+	if (vq->reconnect_log != NULL)
+		vq->reconnect_log->last_avail_idx = vq->last_avail_idx;
+}
+
+static __rte_always_inline void
+vhost_virtqueue_reconnect_log_packed(struct vhost_virtqueue *vq)
+{
+	if (vq->reconnect_log != NULL) {
+		vq->reconnect_log->last_avail_idx = vq->last_avail_idx;
+		vq->reconnect_log->avail_wrap_counter = vq->avail_wrap_counter;
+	}
+}
+
 static inline void
 vq_assert_lock__(struct virtio_net *dev, struct vhost_virtqueue *vq, const char *func)
 	__rte_assert_exclusive_lock(&vq->access_lock)
@@ -584,6 +617,7 @@ vq_inc_last_avail_packed(struct vhost_virtqueue *vq, uint16_t num)
 		vq->avail_wrap_counter ^= 1;
 		vq->last_avail_idx -= vq->size;
 	}
+	vhost_virtqueue_reconnect_log_packed(vq);
 }
 
 void __vhost_log_cache_write(struct virtio_net *dev,
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 370402d849..f66a0c82f8 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1445,6 +1445,7 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		}
 
 		vq->last_avail_idx += num_buffers;
+		vhost_virtqueue_reconnect_log_split(vq);
 	}
 
 	do_data_copy_enqueue(dev, vq);
@@ -1857,6 +1858,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue
 		pkts_info[slot_idx].mbuf = pkts[pkt_idx];
 
 		vq->last_avail_idx += num_buffers;
+		vhost_virtqueue_reconnect_log_split(vq);
 	}
 
 	if (unlikely(pkt_idx == 0))
@@ -1885,6 +1887,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue
 		/* recover shadow used ring and available ring */
 		vq->shadow_used_idx -= num_descs;
 		vq->last_avail_idx -= num_descs;
+		vhost_virtqueue_reconnect_log_split(vq);
 	}
 
 	/* keep used descriptors */
@@ -2100,6 +2103,7 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx,
 		vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;
 		vq->avail_wrap_counter ^= 1;
 	}
+	vhost_virtqueue_reconnect_log_packed(vq);
 
 	if (async->buffer_idx_packed >= buffers_err)
 		async->buffer_idx_packed -= buffers_err;
@@ -3182,6 +3186,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	if (likely(vq->shadow_used_idx)) {
 		vq->last_avail_idx += vq->shadow_used_idx;
+		vhost_virtqueue_reconnect_log_split(vq);
 		do_data_copy_dequeue(vq);
 		flush_shadow_used_ring_split(dev, vq);
 		vhost_vring_call_split(dev, vq);
@@ -3854,6 +3859,7 @@ virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		async->desc_idx_split++;
 
 		vq->last_avail_idx++;
+		vhost_virtqueue_reconnect_log_split(vq);
 	}
 
 	if (unlikely(dropped))
@@ -3872,6 +3878,7 @@ virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		pkt_idx = n_xfer;
 		/* recover available ring */
 		vq->last_avail_idx -= pkt_err;
+		vhost_virtqueue_reconnect_log_split(vq);
 
 		/**
 		 * recover async channel copy related structures and free pktmbufs
@@ -4153,6 +4160,7 @@ virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			vq->last_avail_idx += vq->size - descs_err;
 			vq->avail_wrap_counter ^= 1;
 		}
+		vhost_virtqueue_reconnect_log_packed(vq);
 	}
 
 	async->pkts_idx += pkt_idx;
diff --git a/lib/vhost/virtio_net_ctrl.c b/lib/vhost/virtio_net_ctrl.c
index 8f78122361..b8ee94018e 100644
--- a/lib/vhost/virtio_net_ctrl.c
+++ b/lib/vhost/virtio_net_ctrl.c
@@ -169,6 +169,7 @@ virtio_net_ctrl_pop(struct virtio_net *dev, struct vhost_virtqueue *cvq,
 	cvq->last_avail_idx++;
 	if (cvq->last_avail_idx >= cvq->size)
 		cvq->last_avail_idx -= cvq->size;
+	vhost_virtqueue_reconnect_log_split(cvq);
 
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
 		vhost_avail_event(cvq) = cvq->last_avail_idx;
@@ -181,6 +182,7 @@ virtio_net_ctrl_pop(struct virtio_net *dev, struct vhost_virtqueue *cvq,
 	cvq->last_avail_idx++;
 	if (cvq->last_avail_idx >= cvq->size)
 		cvq->last_avail_idx -= cvq->size;
+	vhost_virtqueue_reconnect_log_split(cvq);
 
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
 		vhost_avail_event(cvq) = cvq->last_avail_idx;
-- 
2.46.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] vhost: add reconnection support to VDUSE
  2024-09-05 14:26 [PATCH 0/2] vhost: add VDUSE reconnection support Maxime Coquelin
  2024-09-05 14:26 ` [PATCH 1/2] vhost: add logging mechanism for reconnection Maxime Coquelin
@ 2024-09-05 14:26 ` Maxime Coquelin
  2024-09-06  7:14   ` Chenbo Xia
  1 sibling, 1 reply; 7+ messages in thread
From: Maxime Coquelin @ 2024-09-05 14:26 UTC (permalink / raw)
  To: dev, david.marchand, chenbox; +Cc: Maxime Coquelin

This patch enables VDUSE reconnection support making use of
the newly introduced reconnection mechanism in Vhost
library.

At DPDK VDUSE device creation time, there are two
possibilities:
 1. The Kernel VDUSE device does not exist:
  a. A reconnection file named after the VUDSE device name
     is created in VDUSE tmpfs.
  b. The file is truncated to 'struct vhost_reconnect_data'
     size, and mmapped.
  c. Negotiated features, Virtio status... are saved for
     sanity checks at reconnect time.
 2. The Kernel VDUSE device already exists:
  a. Exit with failure if no reconnect file exists for
     this device.
  b. Open and mmap the reconnect file.
  c. Perform sanity check to ensure features are compatible.
  d. Restore virtqueues' available indexes at startup time.

Then at runtime, the virtqueues' available index are logged by
the Vhost reconnection mechanism.

At DPDK VDUSE device destruction time, there are two
possibilities:
 1. The Kernel VDUSE device destruction succeed, which
    means it is no more attached to the vDPA bus. The
    reconnection file is unmapped and then removed.
 2. The Kernel VDUSE device destruction failed, meaning it
    is no more attached to the vDPA bus. The reconnection
    file is unmapped but not removed to make possible later
    reconnection.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/vduse.c | 280 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 241 insertions(+), 39 deletions(-)

diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index c66602905c..bd0e492d62 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -136,7 +136,7 @@ vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused)
 }
 
 static void
-vduse_vring_setup(struct virtio_net *dev, unsigned int index)
+vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
 {
 	struct vhost_virtqueue *vq = dev->virtqueue[index];
 	struct vhost_vring_addr *ra = &vq->ring_addrs;
@@ -152,6 +152,19 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
 		return;
 	}
 
+	if (reconnect) {
+		vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
+		vq->last_used_idx = vq->reconnect_log->last_avail_idx;
+	} else {
+		vq->last_avail_idx = vq_info.split.avail_index;
+		vq->last_used_idx = vq_info.split.avail_index;
+	}
+	vq->size = vq_info.num;
+	vq->ready = true;
+	vq->enabled = vq_info.ready;
+	ra->desc_user_addr = vq_info.desc_addr;
+	ra->avail_user_addr = vq_info.driver_addr;
+	ra->used_user_addr = vq_info.device_addr;
 	VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
@@ -162,15 +175,6 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
 			(unsigned long long)vq_info.device_addr);
 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq_info.split.avail_index);
 	VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
-
-	vq->last_avail_idx = vq_info.split.avail_index;
-	vq->size = vq_info.num;
-	vq->ready = true;
-	vq->enabled = vq_info.ready;
-	ra->desc_user_addr = vq_info.desc_addr;
-	ra->avail_user_addr = vq_info.driver_addr;
-	ra->used_user_addr = vq_info.device_addr;
-
 	vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
 	if (vq->kickfd < 0) {
 		VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s",
@@ -267,7 +271,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
 }
 
 static void
-vduse_device_start(struct virtio_net *dev)
+vduse_device_start(struct virtio_net *dev, bool reconnect)
 {
 	unsigned int i, ret;
 
@@ -287,6 +291,15 @@ vduse_device_start(struct virtio_net *dev)
 		return;
 	}
 
+	if (reconnect && dev->features != dev->reconnect_log->features) {
+		VHOST_CONFIG_LOG(dev->ifname, ERR,
+				"Mismatch between reconnect file features 0x%" PRIx64 " & device features 0x%" PRIx64,
+				dev->reconnect_log->features, dev->features);
+		return;
+	}
+
+	dev->reconnect_log->features = dev->features;
+
 	VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64,
 		dev->features);
 
@@ -300,7 +313,7 @@ vduse_device_start(struct virtio_net *dev)
 	}
 
 	for (i = 0; i < dev->nr_vring; i++)
-		vduse_vring_setup(dev, i);
+		vduse_vring_setup(dev, i, reconnect);
 
 	dev->flags |= VIRTIO_DEV_READY;
 
@@ -373,6 +386,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
 				req.s.status);
 		old_status = dev->status;
 		dev->status = req.s.status;
+		dev->reconnect_log->status = dev->status;
 		resp.result = VDUSE_REQ_RESULT_OK;
 		break;
 	case VDUSE_UPDATE_IOTLB:
@@ -398,7 +412,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
 
 	if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
 		if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)
-			vduse_device_start(dev);
+			vduse_device_start(dev, false);
 		else
 			vduse_device_stop(dev);
 	}
@@ -407,10 +421,64 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
 			vduse_req_id_to_str(req.type), req.type);
 }
 
+static char vduse_reconnect_dir[PATH_MAX];
+static bool vduse_reconnect_path_set;
+
+static int
+vduse_reconnect_path_init(void)
+{
+	const char *directory;
+	int ret;
+
+	/* from RuntimeDirectory= see systemd.exec */
+	directory = getenv("RUNTIME_DIRECTORY");
+	if (directory == NULL) {
+		/*
+		 * Used standard convention defined in
+		 * XDG Base Directory Specification and
+		 * Filesystem Hierarchy Standard.
+		 */
+		if (getuid() == 0)
+			directory = "/var/run";
+		else
+			directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp";
+	}
+
+	ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), "%s/dpdk/vduse",
+			directory);
+	if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) {
+		VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE reconnect path name");
+		return -1;
+	}
+
+	ret = mkdir(vduse_reconnect_dir, 0700);
+	if (ret < 0 && errno != EEXIST) {
+		VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s",
+				vduse_reconnect_dir, strerror(errno));
+		return -1;
+	}
+
+	VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in %s",
+			vduse_reconnect_dir);
+
+	return 0;
+}
+
+static void
+vduse_reconnect_handler(int fd, void *arg, int *remove)
+{
+	struct virtio_net *dev = arg;
+
+	vduse_device_start(dev, true);
+
+	close(fd);
+	*remove = 1;
+}
+
 int
 vduse_device_create(const char *path, bool compliant_ol_flags)
 {
-	int control_fd, dev_fd, vid, ret;
+	int control_fd, dev_fd, vid, ret, reco_fd;
 	uint32_t i, max_queue_pairs, total_queues;
 	struct virtio_net *dev;
 	struct virtio_net_config vnet_config = {{ 0 }};
@@ -418,6 +486,9 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
 	uint64_t features;
 	struct vduse_dev_config *dev_config = NULL;
 	const char *name = path + strlen("/dev/vduse/");
+	char reconnect_file[PATH_MAX];
+	struct vhost_reconnect_data *reconnect_log = NULL;
+	bool reconnect = false;
 
 	if (vduse.fdset == NULL) {
 		vduse.fdset = fdset_init("vduse-evt");
@@ -427,6 +498,20 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
 		}
 	}
 
+	if (vduse_reconnect_path_set == false) {
+		if (vduse_reconnect_path_init() < 0) {
+			VHOST_CONFIG_LOG(path, ERR, "failed to initialize reconnect path");
+			return -1;
+		}
+		vduse_reconnect_path_set = true;
+	}
+
+	ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s", vduse_reconnect_dir, name);
+	if (ret < 0 || ret == sizeof(reconnect_file)) {
+		VHOST_CONFIG_LOG(name, ERR, "Failed to create vduse reconnect path name");
+		return -1;
+	}
+
 	control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
 	if (control_fd < 0) {
 		VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
@@ -441,14 +526,6 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
 		goto out_ctrl_close;
 	}
 
-	dev_config = malloc(offsetof(struct vduse_dev_config, config) +
-			sizeof(vnet_config));
-	if (!dev_config) {
-		VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
-		ret = -1;
-		goto out_ctrl_close;
-	}
-
 	ret = rte_vhost_driver_get_features(path, &features);
 	if (ret < 0) {
 		VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features");
@@ -469,23 +546,97 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
 	else
 		total_queues += 1; /* Includes ctrl queue */
 
-	vnet_config.max_virtqueue_pairs = max_queue_pairs;
-	memset(dev_config, 0, sizeof(struct vduse_dev_config));
+	if (access(path, F_OK) == 0) {
+		VHOST_CONFIG_LOG(name, INFO, "Device already exists, reconnecting...");
+		reconnect = true;
+
+		reco_fd = open(reconnect_file, O_RDWR, 0600);
+		if (reco_fd < 0) {
+			if (errno == ENOENT) {
+				VHOST_CONFIG_LOG(name, ERR, "Missing reconnect file (%s)",
+						reconnect_file);
+			} else {
+				VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
+						reconnect_file, strerror(errno));
+			}
+			ret = -1;
+			goto out_ctrl_close;
+		}
+
+		reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
+				MAP_SHARED, reco_fd, 0);
+		close(reco_fd);
+		if (reconnect_log == MAP_FAILED) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
+					reconnect_file, strerror(errno));
+			ret = -1;
+			goto out_ctrl_close;
+		}
+	} else {
+		reco_fd = open(reconnect_file, O_CREAT | O_EXCL | O_RDWR, 0600);
+		if (reco_fd < 0) {
+			if (errno == EEXIST) {
+				VHOST_CONFIG_LOG(name, ERR, "Reconnect file %s exists but not the device",
+						reconnect_file);
+			} else {
+				VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
+						reconnect_file, strerror(errno));
+			}
+			ret = -1;
+			goto out_ctrl_close;
+		}
 
-	strncpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
-	dev_config->device_id = VIRTIO_ID_NET;
-	dev_config->vendor_id = 0;
-	dev_config->features = features;
-	dev_config->vq_num = total_queues;
-	dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
-	dev_config->config_size = sizeof(struct virtio_net_config);
-	memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
+		ret = ftruncate(reco_fd, sizeof(*reconnect_log));
+		if (ret < 0) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to truncate reconnect file %s (%s)",
+					reconnect_file, strerror(errno));
+			close(reco_fd);
+			goto out_ctrl_close;
+		}
 
-	ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
-	if (ret < 0) {
-		VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
-				strerror(errno));
-		goto out_free;
+		reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
+					MAP_SHARED, reco_fd, 0);
+		close(reco_fd);
+		if (reconnect_log == MAP_FAILED) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
+					reconnect_file, strerror(errno));
+			ret = -1;
+			goto out_ctrl_close;
+		}
+
+		reconnect_log->version = 0;
+
+		dev_config = malloc(offsetof(struct vduse_dev_config, config) +
+				sizeof(vnet_config));
+		if (!dev_config) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
+			ret = -1;
+			goto out_ctrl_close;
+		}
+
+		vnet_config.max_virtqueue_pairs = max_queue_pairs;
+		memset(dev_config, 0, sizeof(struct vduse_dev_config));
+
+		rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
+		dev_config->device_id = VIRTIO_ID_NET;
+		dev_config->vendor_id = 0;
+		dev_config->features = features;
+		dev_config->vq_num = total_queues;
+		dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
+		dev_config->config_size = sizeof(struct virtio_net_config);
+		memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
+
+		ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
+		if (ret < 0) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
+					strerror(errno));
+			goto out_free;
+		}
+
+		memcpy(&reconnect_log->config, &vnet_config, sizeof(vnet_config));
+		reconnect_log->nr_vrings = total_queues;
+		free(dev_config);
+		dev_config = NULL;
 	}
 
 	dev_fd = open(path, O_RDWR);
@@ -519,10 +670,15 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
 	strncpy(dev->ifname, path, IF_NAME_SZ - 1);
 	dev->vduse_ctrl_fd = control_fd;
 	dev->vduse_dev_fd = dev_fd;
+	dev->reconnect_log = reconnect_log;
+	if (reconnect)
+		dev->status = dev->reconnect_log->status;
+
 	vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true);
 
 	for (i = 0; i < total_queues; i++) {
 		struct vduse_vq_config vq_cfg = { 0 };
+		struct vhost_virtqueue *vq;
 
 		ret = alloc_vring_queue(dev, i);
 		if (ret) {
@@ -530,6 +686,12 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
 			goto out_dev_destroy;
 		}
 
+		vq = dev->virtqueue[i];
+		vq->reconnect_log = &reconnect_log->vring[i];
+
+		if (reconnect)
+			continue;
+
 		vq_cfg.index = i;
 		vq_cfg.max_size = 1024;
 
@@ -549,7 +711,28 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
 		goto out_dev_destroy;
 	}
 
-	free(dev_config);
+	if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)  {
+		reco_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+		if (reco_fd < 0) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to create reco_fd: %s",
+					strerror(errno));
+			ret = -1;
+			goto out_dev_destroy;
+		}
+
+		ret = fdset_add(vduse.fdset, reco_fd, vduse_reconnect_handler, NULL, dev);
+		if (ret) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
+					dev->vduse_dev_fd);
+			goto out_dev_destroy;
+		}
+
+		ret = eventfd_write(reco_fd, (eventfd_t)1);
+		if (ret < 0) {
+			VHOST_CONFIG_LOG(name, ERR, "Failed to write to reconnect eventfd");
+			goto out_dev_destroy;
+		}
+	}
 
 	return 0;
 
@@ -587,6 +770,9 @@ vduse_device_destroy(const char *path)
 	if (vid == RTE_MAX_VHOST_DEVICE)
 		return -1;
 
+	if (dev->reconnect_log)
+		munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
+
 	vduse_device_stop(dev);
 
 	fdset_del(vduse.fdset, dev->vduse_dev_fd);
@@ -597,10 +783,26 @@ vduse_device_destroy(const char *path)
 	}
 
 	if (dev->vduse_ctrl_fd >= 0) {
+		char reconnect_file[PATH_MAX];
+
 		ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name);
-		if (ret)
+		if (ret) {
 			VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s",
 					strerror(errno));
+		} else {
+			/*
+			 * VDUSE device was no more attached to the vDPA bus,
+			 * so we can remove the reconnect file.
+			 */
+			ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s",
+					vduse_reconnect_dir, name);
+			if (ret < 0 || ret == sizeof(reconnect_file))
+				VHOST_CONFIG_LOG(name, ERR,
+						"Failed to create vduse reconnect path name");
+			else
+				unlink(reconnect_file);
+		}
+
 		close(dev->vduse_ctrl_fd);
 		dev->vduse_ctrl_fd = -1;
 	}
-- 
2.46.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] vhost: add reconnection support to VDUSE
  2024-09-05 14:26 ` [PATCH 2/2] vhost: add reconnection support to VDUSE Maxime Coquelin
@ 2024-09-06  7:14   ` Chenbo Xia
  2024-09-06  7:15     ` Chenbo Xia
                       ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Chenbo Xia @ 2024-09-06  7:14 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, david.marchand

Hi Maxime,

> On Sep 5, 2024, at 22:26, Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch enables VDUSE reconnection support making use of
> the newly introduced reconnection mechanism in Vhost
> library.
> 
> At DPDK VDUSE device creation time, there are two
> possibilities:
> 1. The Kernel VDUSE device does not exist:
>  a. A reconnection file named after the VUDSE device name
>     is created in VDUSE tmpfs.
>  b. The file is truncated to 'struct vhost_reconnect_data'
>     size, and mmapped.
>  c. Negotiated features, Virtio status... are saved for
>     sanity checks at reconnect time.
> 2. The Kernel VDUSE device already exists:
>  a. Exit with failure if no reconnect file exists for
>     this device.
>  b. Open and mmap the reconnect file.
>  c. Perform sanity check to ensure features are compatible.
>  d. Restore virtqueues' available indexes at startup time.
> 
> Then at runtime, the virtqueues' available index are logged by
> the Vhost reconnection mechanism.
> 
> At DPDK VDUSE device destruction time, there are two
> possibilities:
> 1. The Kernel VDUSE device destruction succeed, which
>    means it is no more attached to the vDPA bus. The
>    reconnection file is unmapped and then removed.
> 2. The Kernel VDUSE device destruction failed, meaning it
>    is no more attached to the vDPA bus. The reconnection
>    file is unmapped but not removed to make possible later
>    reconnection.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/vhost/vduse.c | 280 +++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 241 insertions(+), 39 deletions(-)
> 
> diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
> index c66602905c..bd0e492d62 100644
> --- a/lib/vhost/vduse.c
> +++ b/lib/vhost/vduse.c
> @@ -136,7 +136,7 @@ vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused)
> }
> 
> static void
> -vduse_vring_setup(struct virtio_net *dev, unsigned int index)
> +vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
> {
>        struct vhost_virtqueue *vq = dev->virtqueue[index];
>        struct vhost_vring_addr *ra = &vq->ring_addrs;
> @@ -152,6 +152,19 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>                return;
>        }
> 
> +       if (reconnect) {
> +               vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
> +               vq->last_used_idx = vq->reconnect_log->last_avail_idx;
> +       } else {
> +               vq->last_avail_idx = vq_info.split.avail_index;
> +               vq->last_used_idx = vq_info.split.avail_index;
> +       }
> +       vq->size = vq_info.num;
> +       vq->ready = true;
> +       vq->enabled = vq_info.ready;
> +       ra->desc_user_addr = vq_info.desc_addr;
> +       ra->avail_user_addr = vq_info.driver_addr;
> +       ra->used_user_addr = vq_info.device_addr;
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
> @@ -162,15 +175,6 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>                        (unsigned long long)vq_info.device_addr);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq_info.split.avail_index);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
> -
> -       vq->last_avail_idx = vq_info.split.avail_index;
> -       vq->size = vq_info.num;
> -       vq->ready = true;
> -       vq->enabled = vq_info.ready;
> -       ra->desc_user_addr = vq_info.desc_addr;
> -       ra->avail_user_addr = vq_info.driver_addr;
> -       ra->used_user_addr = vq_info.device_addr;
> -
>        vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>        if (vq->kickfd < 0) {
>                VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s",
> @@ -267,7 +271,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
> }
> 
> static void
> -vduse_device_start(struct virtio_net *dev)
> +vduse_device_start(struct virtio_net *dev, bool reconnect)
> {
>        unsigned int i, ret;
> 
> @@ -287,6 +291,15 @@ vduse_device_start(struct virtio_net *dev)
>                return;
>        }
> 
> +       if (reconnect && dev->features != dev->reconnect_log->features) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "Mismatch between reconnect file features 0x%" PRIx64 " & device features 0x%" PRIx64,

Checkpatch reports long line

> +                               dev->reconnect_log->features, dev->features);
> +               return;
> +       }
> +
> +       dev->reconnect_log->features = dev->features;
> +
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64,
>                dev->features);
> 
> @@ -300,7 +313,7 @@ vduse_device_start(struct virtio_net *dev)
>        }
> 
>        for (i = 0; i < dev->nr_vring; i++)
> -               vduse_vring_setup(dev, i);
> +               vduse_vring_setup(dev, i, reconnect);
> 
>        dev->flags |= VIRTIO_DEV_READY;
> 
> @@ -373,6 +386,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>                                req.s.status);
>                old_status = dev->status;
>                dev->status = req.s.status;
> +               dev->reconnect_log->status = dev->status;
>                resp.result = VDUSE_REQ_RESULT_OK;
>                break;
>        case VDUSE_UPDATE_IOTLB:
> @@ -398,7 +412,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
> 
>        if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
>                if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)
> -                       vduse_device_start(dev);
> +                       vduse_device_start(dev, false);
>                else
>                        vduse_device_stop(dev);
>        }
> @@ -407,10 +421,64 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>                        vduse_req_id_to_str(req.type), req.type);
> }
> 
> +static char vduse_reconnect_dir[PATH_MAX];
> +static bool vduse_reconnect_path_set;
> +
> +static int
> +vduse_reconnect_path_init(void)
> +{
> +       const char *directory;
> +       int ret;
> +
> +       /* from RuntimeDirectory= see systemd.exec */
> +       directory = getenv("RUNTIME_DIRECTORY");
> +       if (directory == NULL) {
> +               /*
> +                * Used standard convention defined in
> +                * XDG Base Directory Specification and
> +                * Filesystem Hierarchy Standard.
> +                */
> +               if (getuid() == 0)
> +                       directory = "/var/run";
> +               else
> +                       directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp";
> +       }
> +
> +       ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), "%s/dpdk/vduse",
> +                       directory);
> +       if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) {
> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE reconnect path name");
> +               return -1;
> +       }
> +
> +       ret = mkdir(vduse_reconnect_dir, 0700);
> +       if (ret < 0 && errno != EEXIST) {
> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s",
> +                               vduse_reconnect_dir, strerror(errno));
> +               return -1;
> +       }
> +
> +       VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in %s",
> +                       vduse_reconnect_dir);
> +
> +       return 0;
> +}
> +
> +static void
> +vduse_reconnect_handler(int fd, void *arg, int *remove)
> +{
> +       struct virtio_net *dev = arg;
> +
> +       vduse_device_start(dev, true);
> +
> +       close(fd);
> +       *remove = 1;
> +}
> +
> int
> vduse_device_create(const char *path, bool compliant_ol_flags)
> {
> -       int control_fd, dev_fd, vid, ret;
> +       int control_fd, dev_fd, vid, ret, reco_fd;
>        uint32_t i, max_queue_pairs, total_queues;
>        struct virtio_net *dev;
>        struct virtio_net_config vnet_config = {{ 0 }};
> @@ -418,6 +486,9 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        uint64_t features;
>        struct vduse_dev_config *dev_config = NULL;
>        const char *name = path + strlen("/dev/vduse/");
> +       char reconnect_file[PATH_MAX];
> +       struct vhost_reconnect_data *reconnect_log = NULL;
> +       bool reconnect = false;
> 
>        if (vduse.fdset == NULL) {
>                vduse.fdset = fdset_init("vduse-evt");
> @@ -427,6 +498,20 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                }
>        }
> 
> +       if (vduse_reconnect_path_set == false) {
> +               if (vduse_reconnect_path_init() < 0) {
> +                       VHOST_CONFIG_LOG(path, ERR, "failed to initialize reconnect path");
> +                       return -1;
> +               }
> +               vduse_reconnect_path_set = true;
> +       }
> +
> +       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s", vduse_reconnect_dir, name);
> +       if (ret < 0 || ret == sizeof(reconnect_file)) {
> +               VHOST_CONFIG_LOG(name, ERR, "Failed to create vduse reconnect path name");
> +               return -1;
> +       }
> +
>        control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
>        if (control_fd < 0) {
>                VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
> @@ -441,14 +526,6 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                goto out_ctrl_close;
>        }
> 
> -       dev_config = malloc(offsetof(struct vduse_dev_config, config) +
> -                       sizeof(vnet_config));
> -       if (!dev_config) {
> -               VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
> -               ret = -1;
> -               goto out_ctrl_close;
> -       }
> -
>        ret = rte_vhost_driver_get_features(path, &features);
>        if (ret < 0) {
>                VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features");
> @@ -469,23 +546,97 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        else
>                total_queues += 1; /* Includes ctrl queue */
> 
> -       vnet_config.max_virtqueue_pairs = max_queue_pairs;
> -       memset(dev_config, 0, sizeof(struct vduse_dev_config));
> +       if (access(path, F_OK) == 0) {
> +               VHOST_CONFIG_LOG(name, INFO, "Device already exists, reconnecting...");
> +               reconnect = true;
> +
> +               reco_fd = open(reconnect_file, O_RDWR, 0600);
> +               if (reco_fd < 0) {
> +                       if (errno == ENOENT) {
> +                               VHOST_CONFIG_LOG(name, ERR, "Missing reconnect file (%s)",
> +                                               reconnect_file);
> +                       } else {
> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
> +                                               reconnect_file, strerror(errno));
> +                       }

Seems no {} is needed for if-else?

> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
> +                               MAP_SHARED, reco_fd, 0);
> +               close(reco_fd);
> +               if (reconnect_log == MAP_FAILED) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +       } else {
> +               reco_fd = open(reconnect_file, O_CREAT | O_EXCL | O_RDWR, 0600);
> +               if (reco_fd < 0) {
> +                       if (errno == EEXIST) {
> +                               VHOST_CONFIG_LOG(name, ERR, "Reconnect file %s exists but not the device",
> +                                               reconnect_file);
> +                       } else {
> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
> +                                               reconnect_file, strerror(errno));
> +                       }
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> 
> -       strncpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
> -       dev_config->device_id = VIRTIO_ID_NET;
> -       dev_config->vendor_id = 0;
> -       dev_config->features = features;
> -       dev_config->vq_num = total_queues;
> -       dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
> -       dev_config->config_size = sizeof(struct virtio_net_config);
> -       memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
> +               ret = ftruncate(reco_fd, sizeof(*reconnect_log));
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to truncate reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       close(reco_fd);
> +                       goto out_ctrl_close;
> +               }
> 
> -       ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
> -       if (ret < 0) {
> -               VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
> -                               strerror(errno));
> -               goto out_free;
> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
> +                                       MAP_SHARED, reco_fd, 0);
> +               close(reco_fd);
> +               if (reconnect_log == MAP_FAILED) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               reconnect_log->version = 0;
> +
> +               dev_config = malloc(offsetof(struct vduse_dev_config, config) +
> +                               sizeof(vnet_config));
> +               if (!dev_config) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               vnet_config.max_virtqueue_pairs = max_queue_pairs;
> +               memset(dev_config, 0, sizeof(struct vduse_dev_config));
> +
> +               rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
> +               dev_config->device_id = VIRTIO_ID_NET;
> +               dev_config->vendor_id = 0;
> +               dev_config->features = features;
> +               dev_config->vq_num = total_queues;
> +               dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
> +               dev_config->config_size = sizeof(struct virtio_net_config);
> +               memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
> +
> +               ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
> +                                       strerror(errno));
> +                       goto out_free;
> +               }
> +
> +               memcpy(&reconnect_log->config, &vnet_config, sizeof(vnet_config));
> +               reconnect_log->nr_vrings = total_queues;
> +               free(dev_config);
> +               dev_config = NULL;
>        }
> 
>        dev_fd = open(path, O_RDWR);
> @@ -519,10 +670,15 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        strncpy(dev->ifname, path, IF_NAME_SZ - 1);
>        dev->vduse_ctrl_fd = control_fd;
>        dev->vduse_dev_fd = dev_fd;
> +       dev->reconnect_log = reconnect_log;
> +       if (reconnect)
> +               dev->status = dev->reconnect_log->status;
> +
>        vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true);
> 
>        for (i = 0; i < total_queues; i++) {
>                struct vduse_vq_config vq_cfg = { 0 };
> +               struct vhost_virtqueue *vq;
> 
>                ret = alloc_vring_queue(dev, i);
>                if (ret) {
> @@ -530,6 +686,12 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                        goto out_dev_destroy;
>                }
> 
> +               vq = dev->virtqueue[i];
> +               vq->reconnect_log = &reconnect_log->vring[i];
> +
> +               if (reconnect)
> +                       continue;
> +
>                vq_cfg.index = i;
>                vq_cfg.max_size = 1024;
> 
> @@ -549,7 +711,28 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                goto out_dev_destroy;
>        }
> 
> -       free(dev_config);
> +       if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)  {
> +               reco_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +               if (reco_fd < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create reco_fd: %s",
> +                                       strerror(errno));
> +                       ret = -1;
> +                       goto out_dev_destroy;
> +               }
> +
> +               ret = fdset_add(vduse.fdset, reco_fd, vduse_reconnect_handler, NULL, dev);
> +               if (ret) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
> +                                       dev->vduse_dev_fd);

Should print reco_fd

> +                       goto out_dev_destroy;
> +               }
> +
> +               ret = eventfd_write(reco_fd, (eventfd_t)1);
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to write to reconnect eventfd");
> +                       goto out_dev_destroy;
> +               }
> +       }

Maybe I missed something, why we need to implement like this instead of directly call vduse_device_start?

Thanks,
Chenbo

> 
>        return 0;
> 
> @@ -587,6 +770,9 @@ vduse_device_destroy(const char *path)
>        if (vid == RTE_MAX_VHOST_DEVICE)
>                return -1;
> 
> +       if (dev->reconnect_log)
> +               munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
> +
>        vduse_device_stop(dev);
> 
>        fdset_del(vduse.fdset, dev->vduse_dev_fd);
> @@ -597,10 +783,26 @@ vduse_device_destroy(const char *path)
>        }
> 
>        if (dev->vduse_ctrl_fd >= 0) {
> +               char reconnect_file[PATH_MAX];
> +
>                ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name);
> -               if (ret)
> +               if (ret) {
>                        VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s",
>                                        strerror(errno));
> +               } else {
> +                       /*
> +                        * VDUSE device was no more attached to the vDPA bus,
> +                        * so we can remove the reconnect file.
> +                        */
> +                       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s",
> +                                       vduse_reconnect_dir, name);
> +                       if (ret < 0 || ret == sizeof(reconnect_file))
> +                               VHOST_CONFIG_LOG(name, ERR,
> +                                               "Failed to create vduse reconnect path name");
> +                       else
> +                               unlink(reconnect_file);
> +               }
> +
>                close(dev->vduse_ctrl_fd);
>                dev->vduse_ctrl_fd = -1;
>        }
> --
> 2.46.0
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] vhost: add reconnection support to VDUSE
  2024-09-06  7:14   ` Chenbo Xia
@ 2024-09-06  7:15     ` Chenbo Xia
  2024-09-06  7:48     ` Maxime Coquelin
  2024-09-06  9:20     ` Chenbo Xia
  2 siblings, 0 replies; 7+ messages in thread
From: Chenbo Xia @ 2024-09-06  7:15 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, david.marchand

Hi Maxime,

> On Sep 5, 2024, at 22:26, Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch enables VDUSE reconnection support making use of
> the newly introduced reconnection mechanism in Vhost
> library.
> 
> At DPDK VDUSE device creation time, there are two
> possibilities:
> 1. The Kernel VDUSE device does not exist:
>  a. A reconnection file named after the VUDSE device name
>     is created in VDUSE tmpfs.
>  b. The file is truncated to 'struct vhost_reconnect_data'
>     size, and mmapped.
>  c. Negotiated features, Virtio status... are saved for
>     sanity checks at reconnect time.
> 2. The Kernel VDUSE device already exists:
>  a. Exit with failure if no reconnect file exists for
>     this device.
>  b. Open and mmap the reconnect file.
>  c. Perform sanity check to ensure features are compatible.
>  d. Restore virtqueues' available indexes at startup time.
> 
> Then at runtime, the virtqueues' available index are logged by
> the Vhost reconnection mechanism.
> 
> At DPDK VDUSE device destruction time, there are two
> possibilities:
> 1. The Kernel VDUSE device destruction succeed, which
>    means it is no more attached to the vDPA bus. The
>    reconnection file is unmapped and then removed.
> 2. The Kernel VDUSE device destruction failed, meaning it
>    is no more attached to the vDPA bus. The reconnection
>    file is unmapped but not removed to make possible later
>    reconnection.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/vhost/vduse.c | 280 +++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 241 insertions(+), 39 deletions(-)
> 
> diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
> index c66602905c..bd0e492d62 100644
> --- a/lib/vhost/vduse.c
> +++ b/lib/vhost/vduse.c
> @@ -136,7 +136,7 @@ vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused)
> }
> 
> static void
> -vduse_vring_setup(struct virtio_net *dev, unsigned int index)
> +vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
> {
>        struct vhost_virtqueue *vq = dev->virtqueue[index];
>        struct vhost_vring_addr *ra = &vq->ring_addrs;
> @@ -152,6 +152,19 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>                return;
>        }
> 
> +       if (reconnect) {
> +               vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
> +               vq->last_used_idx = vq->reconnect_log->last_avail_idx;
> +       } else {
> +               vq->last_avail_idx = vq_info.split.avail_index;
> +               vq->last_used_idx = vq_info.split.avail_index;
> +       }
> +       vq->size = vq_info.num;
> +       vq->ready = true;
> +       vq->enabled = vq_info.ready;
> +       ra->desc_user_addr = vq_info.desc_addr;
> +       ra->avail_user_addr = vq_info.driver_addr;
> +       ra->used_user_addr = vq_info.device_addr;
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
> @@ -162,15 +175,6 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>                        (unsigned long long)vq_info.device_addr);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq_info.split.avail_index);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
> -
> -       vq->last_avail_idx = vq_info.split.avail_index;
> -       vq->size = vq_info.num;
> -       vq->ready = true;
> -       vq->enabled = vq_info.ready;
> -       ra->desc_user_addr = vq_info.desc_addr;
> -       ra->avail_user_addr = vq_info.driver_addr;
> -       ra->used_user_addr = vq_info.device_addr;
> -
>        vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>        if (vq->kickfd < 0) {
>                VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s",
> @@ -267,7 +271,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
> }
> 
> static void
> -vduse_device_start(struct virtio_net *dev)
> +vduse_device_start(struct virtio_net *dev, bool reconnect)
> {
>        unsigned int i, ret;
> 
> @@ -287,6 +291,15 @@ vduse_device_start(struct virtio_net *dev)
>                return;
>        }
> 
> +       if (reconnect && dev->features != dev->reconnect_log->features) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "Mismatch between reconnect file features 0x%" PRIx64 " & device features 0x%" PRIx64,

Checkpatch reports long line

> +                               dev->reconnect_log->features, dev->features);
> +               return;
> +       }
> +
> +       dev->reconnect_log->features = dev->features;
> +
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64,
>                dev->features);
> 
> @@ -300,7 +313,7 @@ vduse_device_start(struct virtio_net *dev)
>        }
> 
>        for (i = 0; i < dev->nr_vring; i++)
> -               vduse_vring_setup(dev, i);
> +               vduse_vring_setup(dev, i, reconnect);
> 
>        dev->flags |= VIRTIO_DEV_READY;
> 
> @@ -373,6 +386,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>                                req.s.status);
>                old_status = dev->status;
>                dev->status = req.s.status;
> +               dev->reconnect_log->status = dev->status;
>                resp.result = VDUSE_REQ_RESULT_OK;
>                break;
>        case VDUSE_UPDATE_IOTLB:
> @@ -398,7 +412,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
> 
>        if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
>                if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)
> -                       vduse_device_start(dev);
> +                       vduse_device_start(dev, false);
>                else
>                        vduse_device_stop(dev);
>        }
> @@ -407,10 +421,64 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>                        vduse_req_id_to_str(req.type), req.type);
> }
> 
> +static char vduse_reconnect_dir[PATH_MAX];
> +static bool vduse_reconnect_path_set;
> +
> +static int
> +vduse_reconnect_path_init(void)
> +{
> +       const char *directory;
> +       int ret;
> +
> +       /* from RuntimeDirectory= see systemd.exec */
> +       directory = getenv("RUNTIME_DIRECTORY");
> +       if (directory == NULL) {
> +               /*
> +                * Used standard convention defined in
> +                * XDG Base Directory Specification and
> +                * Filesystem Hierarchy Standard.
> +                */
> +               if (getuid() == 0)
> +                       directory = "/var/run";
> +               else
> +                       directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp";
> +       }
> +
> +       ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), "%s/dpdk/vduse",
> +                       directory);
> +       if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) {
> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE reconnect path name");
> +               return -1;
> +       }
> +
> +       ret = mkdir(vduse_reconnect_dir, 0700);
> +       if (ret < 0 && errno != EEXIST) {
> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s",
> +                               vduse_reconnect_dir, strerror(errno));
> +               return -1;
> +       }
> +
> +       VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in %s",
> +                       vduse_reconnect_dir);
> +
> +       return 0;
> +}
> +
> +static void
> +vduse_reconnect_handler(int fd, void *arg, int *remove)
> +{
> +       struct virtio_net *dev = arg;
> +
> +       vduse_device_start(dev, true);
> +
> +       close(fd);
> +       *remove = 1;
> +}
> +
> int
> vduse_device_create(const char *path, bool compliant_ol_flags)
> {
> -       int control_fd, dev_fd, vid, ret;
> +       int control_fd, dev_fd, vid, ret, reco_fd;
>        uint32_t i, max_queue_pairs, total_queues;
>        struct virtio_net *dev;
>        struct virtio_net_config vnet_config = {{ 0 }};
> @@ -418,6 +486,9 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        uint64_t features;
>        struct vduse_dev_config *dev_config = NULL;
>        const char *name = path + strlen("/dev/vduse/");
> +       char reconnect_file[PATH_MAX];
> +       struct vhost_reconnect_data *reconnect_log = NULL;
> +       bool reconnect = false;
> 
>        if (vduse.fdset == NULL) {
>                vduse.fdset = fdset_init("vduse-evt");
> @@ -427,6 +498,20 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                }
>        }
> 
> +       if (vduse_reconnect_path_set == false) {
> +               if (vduse_reconnect_path_init() < 0) {
> +                       VHOST_CONFIG_LOG(path, ERR, "failed to initialize reconnect path");
> +                       return -1;
> +               }
> +               vduse_reconnect_path_set = true;
> +       }
> +
> +       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s", vduse_reconnect_dir, name);
> +       if (ret < 0 || ret == sizeof(reconnect_file)) {
> +               VHOST_CONFIG_LOG(name, ERR, "Failed to create vduse reconnect path name");
> +               return -1;
> +       }
> +
>        control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
>        if (control_fd < 0) {
>                VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
> @@ -441,14 +526,6 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                goto out_ctrl_close;
>        }
> 
> -       dev_config = malloc(offsetof(struct vduse_dev_config, config) +
> -                       sizeof(vnet_config));
> -       if (!dev_config) {
> -               VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
> -               ret = -1;
> -               goto out_ctrl_close;
> -       }
> -
>        ret = rte_vhost_driver_get_features(path, &features);
>        if (ret < 0) {
>                VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features");
> @@ -469,23 +546,97 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        else
>                total_queues += 1; /* Includes ctrl queue */
> 
> -       vnet_config.max_virtqueue_pairs = max_queue_pairs;
> -       memset(dev_config, 0, sizeof(struct vduse_dev_config));
> +       if (access(path, F_OK) == 0) {
> +               VHOST_CONFIG_LOG(name, INFO, "Device already exists, reconnecting...");
> +               reconnect = true;
> +
> +               reco_fd = open(reconnect_file, O_RDWR, 0600);
> +               if (reco_fd < 0) {
> +                       if (errno == ENOENT) {
> +                               VHOST_CONFIG_LOG(name, ERR, "Missing reconnect file (%s)",
> +                                               reconnect_file);
> +                       } else {
> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
> +                                               reconnect_file, strerror(errno));
> +                       }

Seems no {} is needed for if-else?

> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
> +                               MAP_SHARED, reco_fd, 0);
> +               close(reco_fd);
> +               if (reconnect_log == MAP_FAILED) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +       } else {
> +               reco_fd = open(reconnect_file, O_CREAT | O_EXCL | O_RDWR, 0600);
> +               if (reco_fd < 0) {
> +                       if (errno == EEXIST) {
> +                               VHOST_CONFIG_LOG(name, ERR, "Reconnect file %s exists but not the device",
> +                                               reconnect_file);
> +                       } else {
> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
> +                                               reconnect_file, strerror(errno));
> +                       }
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> 
> -       strncpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
> -       dev_config->device_id = VIRTIO_ID_NET;
> -       dev_config->vendor_id = 0;
> -       dev_config->features = features;
> -       dev_config->vq_num = total_queues;
> -       dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
> -       dev_config->config_size = sizeof(struct virtio_net_config);
> -       memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
> +               ret = ftruncate(reco_fd, sizeof(*reconnect_log));
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to truncate reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       close(reco_fd);
> +                       goto out_ctrl_close;
> +               }
> 
> -       ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
> -       if (ret < 0) {
> -               VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
> -                               strerror(errno));
> -               goto out_free;
> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
> +                                       MAP_SHARED, reco_fd, 0);
> +               close(reco_fd);
> +               if (reconnect_log == MAP_FAILED) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               reconnect_log->version = 0;
> +
> +               dev_config = malloc(offsetof(struct vduse_dev_config, config) +
> +                               sizeof(vnet_config));
> +               if (!dev_config) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               vnet_config.max_virtqueue_pairs = max_queue_pairs;
> +               memset(dev_config, 0, sizeof(struct vduse_dev_config));
> +
> +               rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
> +               dev_config->device_id = VIRTIO_ID_NET;
> +               dev_config->vendor_id = 0;
> +               dev_config->features = features;
> +               dev_config->vq_num = total_queues;
> +               dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
> +               dev_config->config_size = sizeof(struct virtio_net_config);
> +               memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
> +
> +               ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
> +                                       strerror(errno));
> +                       goto out_free;
> +               }
> +
> +               memcpy(&reconnect_log->config, &vnet_config, sizeof(vnet_config));
> +               reconnect_log->nr_vrings = total_queues;
> +               free(dev_config);
> +               dev_config = NULL;
>        }
> 
>        dev_fd = open(path, O_RDWR);
> @@ -519,10 +670,15 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        strncpy(dev->ifname, path, IF_NAME_SZ - 1);
>        dev->vduse_ctrl_fd = control_fd;
>        dev->vduse_dev_fd = dev_fd;
> +       dev->reconnect_log = reconnect_log;
> +       if (reconnect)
> +               dev->status = dev->reconnect_log->status;
> +
>        vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true);
> 
>        for (i = 0; i < total_queues; i++) {
>                struct vduse_vq_config vq_cfg = { 0 };
> +               struct vhost_virtqueue *vq;
> 
>                ret = alloc_vring_queue(dev, i);
>                if (ret) {
> @@ -530,6 +686,12 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                        goto out_dev_destroy;
>                }
> 
> +               vq = dev->virtqueue[i];
> +               vq->reconnect_log = &reconnect_log->vring[i];
> +
> +               if (reconnect)
> +                       continue;
> +
>                vq_cfg.index = i;
>                vq_cfg.max_size = 1024;
> 
> @@ -549,7 +711,28 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                goto out_dev_destroy;
>        }
> 
> -       free(dev_config);
> +       if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)  {
> +               reco_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +               if (reco_fd < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create reco_fd: %s",
> +                                       strerror(errno));
> +                       ret = -1;
> +                       goto out_dev_destroy;
> +               }
> +
> +               ret = fdset_add(vduse.fdset, reco_fd, vduse_reconnect_handler, NULL, dev);
> +               if (ret) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
> +                                       dev->vduse_dev_fd);

Should print reco_fd

> +                       goto out_dev_destroy;
> +               }
> +
> +               ret = eventfd_write(reco_fd, (eventfd_t)1);
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to write to reconnect eventfd");
> +                       goto out_dev_destroy;
> +               }
> +       }

Maybe I missed something, why we need to implement like this instead of directly call vduse_device_start?

Thanks,
Chenbo

> 
>        return 0;
> 
> @@ -587,6 +770,9 @@ vduse_device_destroy(const char *path)
>        if (vid == RTE_MAX_VHOST_DEVICE)
>                return -1;
> 
> +       if (dev->reconnect_log)
> +               munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
> +
>        vduse_device_stop(dev);
> 
>        fdset_del(vduse.fdset, dev->vduse_dev_fd);
> @@ -597,10 +783,26 @@ vduse_device_destroy(const char *path)
>        }
> 
>        if (dev->vduse_ctrl_fd >= 0) {
> +               char reconnect_file[PATH_MAX];
> +
>                ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name);
> -               if (ret)
> +               if (ret) {
>                        VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s",
>                                        strerror(errno));
> +               } else {
> +                       /*
> +                        * VDUSE device was no more attached to the vDPA bus,
> +                        * so we can remove the reconnect file.
> +                        */
> +                       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s",
> +                                       vduse_reconnect_dir, name);
> +                       if (ret < 0 || ret == sizeof(reconnect_file))
> +                               VHOST_CONFIG_LOG(name, ERR,
> +                                               "Failed to create vduse reconnect path name");
> +                       else
> +                               unlink(reconnect_file);
> +               }
> +
>                close(dev->vduse_ctrl_fd);
>                dev->vduse_ctrl_fd = -1;
>        }
> --
> 2.46.0
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] vhost: add reconnection support to VDUSE
  2024-09-06  7:14   ` Chenbo Xia
  2024-09-06  7:15     ` Chenbo Xia
@ 2024-09-06  7:48     ` Maxime Coquelin
  2024-09-06  9:20     ` Chenbo Xia
  2 siblings, 0 replies; 7+ messages in thread
From: Maxime Coquelin @ 2024-09-06  7:48 UTC (permalink / raw)
  To: Chenbo Xia; +Cc: dev, david.marchand

Hi Chenbo,

Thanks for the review!

On 9/6/24 09:14, Chenbo Xia wrote:
> Hi Maxime,
> 
>> On Sep 5, 2024, at 22:26, Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
>>
>> External email: Use caution opening links or attachments
>>
>>
>> This patch enables VDUSE reconnection support making use of
>> the newly introduced reconnection mechanism in Vhost
>> library.
>>
>> At DPDK VDUSE device creation time, there are two
>> possibilities:
>> 1. The Kernel VDUSE device does not exist:
>>   a. A reconnection file named after the VUDSE device name
>>      is created in VDUSE tmpfs.
>>   b. The file is truncated to 'struct vhost_reconnect_data'
>>      size, and mmapped.
>>   c. Negotiated features, Virtio status... are saved for
>>      sanity checks at reconnect time.
>> 2. The Kernel VDUSE device already exists:
>>   a. Exit with failure if no reconnect file exists for
>>      this device.
>>   b. Open and mmap the reconnect file.
>>   c. Perform sanity check to ensure features are compatible.
>>   d. Restore virtqueues' available indexes at startup time.
>>
>> Then at runtime, the virtqueues' available index are logged by
>> the Vhost reconnection mechanism.
>>
>> At DPDK VDUSE device destruction time, there are two
>> possibilities:
>> 1. The Kernel VDUSE device destruction succeed, which
>>     means it is no more attached to the vDPA bus. The
>>     reconnection file is unmapped and then removed.
>> 2. The Kernel VDUSE device destruction failed, meaning it
>>     is no more attached to the vDPA bus. The reconnection
>>     file is unmapped but not removed to make possible later
>>     reconnection.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>> lib/vhost/vduse.c | 280 +++++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 241 insertions(+), 39 deletions(-)
>>
>> diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
>> index c66602905c..bd0e492d62 100644
>> --- a/lib/vhost/vduse.c
>> +++ b/lib/vhost/vduse.c
>> @@ -136,7 +136,7 @@ vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused)
>> }
>>
>> static void
>> -vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>> +vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
>> {
>>         struct vhost_virtqueue *vq = dev->virtqueue[index];
>>         struct vhost_vring_addr *ra = &vq->ring_addrs;
>> @@ -152,6 +152,19 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>>                 return;
>>         }
>>
>> +       if (reconnect) {
>> +               vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
>> +               vq->last_used_idx = vq->reconnect_log->last_avail_idx;
>> +       } else {
>> +               vq->last_avail_idx = vq_info.split.avail_index;
>> +               vq->last_used_idx = vq_info.split.avail_index;
>> +       }
>> +       vq->size = vq_info.num;
>> +       vq->ready = true;
>> +       vq->enabled = vq_info.ready;
>> +       ra->desc_user_addr = vq_info.desc_addr;
>> +       ra->avail_user_addr = vq_info.driver_addr;
>> +       ra->used_user_addr = vq_info.device_addr;
>>         VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
>>         VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
>>         VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
>> @@ -162,15 +175,6 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>>                         (unsigned long long)vq_info.device_addr);
>>         VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq_info.split.avail_index);
>>         VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
>> -
>> -       vq->last_avail_idx = vq_info.split.avail_index;
>> -       vq->size = vq_info.num;
>> -       vq->ready = true;
>> -       vq->enabled = vq_info.ready;
>> -       ra->desc_user_addr = vq_info.desc_addr;
>> -       ra->avail_user_addr = vq_info.driver_addr;
>> -       ra->used_user_addr = vq_info.device_addr;
>> -
>>         vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>>         if (vq->kickfd < 0) {
>>                 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s",
>> @@ -267,7 +271,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
>> }
>>
>> static void
>> -vduse_device_start(struct virtio_net *dev)
>> +vduse_device_start(struct virtio_net *dev, bool reconnect)
>> {
>>         unsigned int i, ret;
>>
>> @@ -287,6 +291,15 @@ vduse_device_start(struct virtio_net *dev)
>>                 return;
>>         }
>>
>> +       if (reconnect && dev->features != dev->reconnect_log->features) {
>> +               VHOST_CONFIG_LOG(dev->ifname, ERR,
>> +                               "Mismatch between reconnect file features 0x%" PRIx64 " & device features 0x%" PRIx64,
> 
> Checkpatch reports long line

I noticed it, but prefered to keep it as is.
Better to have a too long line when grepping logs than splitting IMHO.

> 
>> +                               dev->reconnect_log->features, dev->features);
>> +               return;
>> +       }
>> +
>> +       dev->reconnect_log->features = dev->features;
>> +
>>         VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64,
>>                 dev->features);
>>
>> @@ -300,7 +313,7 @@ vduse_device_start(struct virtio_net *dev)
>>         }
>>
>>         for (i = 0; i < dev->nr_vring; i++)
>> -               vduse_vring_setup(dev, i);
>> +               vduse_vring_setup(dev, i, reconnect);
>>
>>         dev->flags |= VIRTIO_DEV_READY;
>>
>> @@ -373,6 +386,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>>                                 req.s.status);
>>                 old_status = dev->status;
>>                 dev->status = req.s.status;
>> +               dev->reconnect_log->status = dev->status;
>>                 resp.result = VDUSE_REQ_RESULT_OK;
>>                 break;
>>         case VDUSE_UPDATE_IOTLB:
>> @@ -398,7 +412,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>>
>>         if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
>>                 if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)
>> -                       vduse_device_start(dev);
>> +                       vduse_device_start(dev, false);
>>                 else
>>                         vduse_device_stop(dev);
>>         }
>> @@ -407,10 +421,64 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>>                         vduse_req_id_to_str(req.type), req.type);
>> }
>>
>> +static char vduse_reconnect_dir[PATH_MAX];
>> +static bool vduse_reconnect_path_set;
>> +
>> +static int
>> +vduse_reconnect_path_init(void)
>> +{
>> +       const char *directory;
>> +       int ret;
>> +
>> +       /* from RuntimeDirectory= see systemd.exec */
>> +       directory = getenv("RUNTIME_DIRECTORY");
>> +       if (directory == NULL) {
>> +               /*
>> +                * Used standard convention defined in
>> +                * XDG Base Directory Specification and
>> +                * Filesystem Hierarchy Standard.
>> +                */
>> +               if (getuid() == 0)
>> +                       directory = "/var/run";
>> +               else
>> +                       directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp";
>> +       }
>> +
>> +       ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), "%s/dpdk/vduse",
>> +                       directory);
>> +       if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) {
>> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE reconnect path name");
>> +               return -1;
>> +       }
>> +
>> +       ret = mkdir(vduse_reconnect_dir, 0700);
>> +       if (ret < 0 && errno != EEXIST) {
>> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s",
>> +                               vduse_reconnect_dir, strerror(errno));
>> +               return -1;
>> +       }
>> +
>> +       VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in %s",
>> +                       vduse_reconnect_dir);
>> +
>> +       return 0;
>> +}
>> +
>> +static void
>> +vduse_reconnect_handler(int fd, void *arg, int *remove)
>> +{
>> +       struct virtio_net *dev = arg;
>> +
>> +       vduse_device_start(dev, true);
>> +
>> +       close(fd);
>> +       *remove = 1;
>> +}
>> +
>> int
>> vduse_device_create(const char *path, bool compliant_ol_flags)
>> {
>> -       int control_fd, dev_fd, vid, ret;
>> +       int control_fd, dev_fd, vid, ret, reco_fd;
>>         uint32_t i, max_queue_pairs, total_queues;
>>         struct virtio_net *dev;
>>         struct virtio_net_config vnet_config = {{ 0 }};
>> @@ -418,6 +486,9 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>>         uint64_t features;
>>         struct vduse_dev_config *dev_config = NULL;
>>         const char *name = path + strlen("/dev/vduse/");
>> +       char reconnect_file[PATH_MAX];
>> +       struct vhost_reconnect_data *reconnect_log = NULL;
>> +       bool reconnect = false;
>>
>>         if (vduse.fdset == NULL) {
>>                 vduse.fdset = fdset_init("vduse-evt");
>> @@ -427,6 +498,20 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>>                 }
>>         }
>>
>> +       if (vduse_reconnect_path_set == false) {
>> +               if (vduse_reconnect_path_init() < 0) {
>> +                       VHOST_CONFIG_LOG(path, ERR, "failed to initialize reconnect path");
>> +                       return -1;
>> +               }
>> +               vduse_reconnect_path_set = true;
>> +       }
>> +
>> +       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s", vduse_reconnect_dir, name);
>> +       if (ret < 0 || ret == sizeof(reconnect_file)) {
>> +               VHOST_CONFIG_LOG(name, ERR, "Failed to create vduse reconnect path name");
>> +               return -1;
>> +       }
>> +
>>         control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
>>         if (control_fd < 0) {
>>                 VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
>> @@ -441,14 +526,6 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>>                 goto out_ctrl_close;
>>         }
>>
>> -       dev_config = malloc(offsetof(struct vduse_dev_config, config) +
>> -                       sizeof(vnet_config));
>> -       if (!dev_config) {
>> -               VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
>> -               ret = -1;
>> -               goto out_ctrl_close;
>> -       }
>> -
>>         ret = rte_vhost_driver_get_features(path, &features);
>>         if (ret < 0) {
>>                 VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features");
>> @@ -469,23 +546,97 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>>         else
>>                 total_queues += 1; /* Includes ctrl queue */
>>
>> -       vnet_config.max_virtqueue_pairs = max_queue_pairs;
>> -       memset(dev_config, 0, sizeof(struct vduse_dev_config));
>> +       if (access(path, F_OK) == 0) {
>> +               VHOST_CONFIG_LOG(name, INFO, "Device already exists, reconnecting...");
>> +               reconnect = true;
>> +
>> +               reco_fd = open(reconnect_file, O_RDWR, 0600);
>> +               if (reco_fd < 0) {
>> +                       if (errno == ENOENT) {
>> +                               VHOST_CONFIG_LOG(name, ERR, "Missing reconnect file (%s)",
>> +                                               reconnect_file);
>> +                       } else {
>> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
>> +                                               reconnect_file, strerror(errno));
>> +                       }
> 
> Seems no {} is needed for if-else?

Right!

>> +                       ret = -1;
>> +                       goto out_ctrl_close;
>> +               }
>> +
>> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
>> +                               MAP_SHARED, reco_fd, 0);
>> +               close(reco_fd);
>> +               if (reconnect_log == MAP_FAILED) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
>> +                                       reconnect_file, strerror(errno));
>> +                       ret = -1;
>> +                       goto out_ctrl_close;
>> +               }
>> +       } else {
>> +               reco_fd = open(reconnect_file, O_CREAT | O_EXCL | O_RDWR, 0600);
>> +               if (reco_fd < 0) {
>> +                       if (errno == EEXIST) {
>> +                               VHOST_CONFIG_LOG(name, ERR, "Reconnect file %s exists but not the device",
>> +                                               reconnect_file);
>> +                       } else {
>> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
>> +                                               reconnect_file, strerror(errno));
>> +                       }
>> +                       ret = -1;
>> +                       goto out_ctrl_close;
>> +               }
>>
>> -       strncpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
>> -       dev_config->device_id = VIRTIO_ID_NET;
>> -       dev_config->vendor_id = 0;
>> -       dev_config->features = features;
>> -       dev_config->vq_num = total_queues;
>> -       dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
>> -       dev_config->config_size = sizeof(struct virtio_net_config);
>> -       memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
>> +               ret = ftruncate(reco_fd, sizeof(*reconnect_log));
>> +               if (ret < 0) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to truncate reconnect file %s (%s)",
>> +                                       reconnect_file, strerror(errno));
>> +                       close(reco_fd);
>> +                       goto out_ctrl_close;
>> +               }
>>
>> -       ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
>> -       if (ret < 0) {
>> -               VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
>> -                               strerror(errno));
>> -               goto out_free;
>> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
>> +                                       MAP_SHARED, reco_fd, 0);
>> +               close(reco_fd);
>> +               if (reconnect_log == MAP_FAILED) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
>> +                                       reconnect_file, strerror(errno));
>> +                       ret = -1;
>> +                       goto out_ctrl_close;
>> +               }
>> +
>> +               reconnect_log->version = 0;
>> +
>> +               dev_config = malloc(offsetof(struct vduse_dev_config, config) +
>> +                               sizeof(vnet_config));
>> +               if (!dev_config) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
>> +                       ret = -1;
>> +                       goto out_ctrl_close;
>> +               }
>> +
>> +               vnet_config.max_virtqueue_pairs = max_queue_pairs;
>> +               memset(dev_config, 0, sizeof(struct vduse_dev_config));
>> +
>> +               rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
>> +               dev_config->device_id = VIRTIO_ID_NET;
>> +               dev_config->vendor_id = 0;
>> +               dev_config->features = features;
>> +               dev_config->vq_num = total_queues;
>> +               dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
>> +               dev_config->config_size = sizeof(struct virtio_net_config);
>> +               memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
>> +
>> +               ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
>> +               if (ret < 0) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
>> +                                       strerror(errno));
>> +                       goto out_free;
>> +               }
>> +
>> +               memcpy(&reconnect_log->config, &vnet_config, sizeof(vnet_config));
>> +               reconnect_log->nr_vrings = total_queues;
>> +               free(dev_config);
>> +               dev_config = NULL;
>>         }
>>
>>         dev_fd = open(path, O_RDWR);
>> @@ -519,10 +670,15 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>>         strncpy(dev->ifname, path, IF_NAME_SZ - 1);
>>         dev->vduse_ctrl_fd = control_fd;
>>         dev->vduse_dev_fd = dev_fd;
>> +       dev->reconnect_log = reconnect_log;
>> +       if (reconnect)
>> +               dev->status = dev->reconnect_log->status;
>> +
>>         vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true);
>>
>>         for (i = 0; i < total_queues; i++) {
>>                 struct vduse_vq_config vq_cfg = { 0 };
>> +               struct vhost_virtqueue *vq;
>>
>>                 ret = alloc_vring_queue(dev, i);
>>                 if (ret) {
>> @@ -530,6 +686,12 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>>                         goto out_dev_destroy;
>>                 }
>>
>> +               vq = dev->virtqueue[i];
>> +               vq->reconnect_log = &reconnect_log->vring[i];
>> +
>> +               if (reconnect)
>> +                       continue;
>> +
>>                 vq_cfg.index = i;
>>                 vq_cfg.max_size = 1024;
>>
>> @@ -549,7 +711,28 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>>                 goto out_dev_destroy;
>>         }
>>
>> -       free(dev_config);
>> +       if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)  {
>> +               reco_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> +               if (reco_fd < 0) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create reco_fd: %s",
>> +                                       strerror(errno));
>> +                       ret = -1;
>> +                       goto out_dev_destroy;
>> +               }
>> +
>> +               ret = fdset_add(vduse.fdset, reco_fd, vduse_reconnect_handler, NULL, dev);
>> +               if (ret) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
>> +                                       dev->vduse_dev_fd);
> 
> Should print reco_fd

Indeed, copy-paste mistake.

> 
>> +                       goto out_dev_destroy;
>> +               }
>> +
>> +               ret = eventfd_write(reco_fd, (eventfd_t)1);
>> +               if (ret < 0) {
>> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to write to reconnect eventfd");
>> +                       goto out_dev_destroy;
>> +               }
>> +       }
> 
> Maybe I missed something, why we need to implement like this instead of directly call vduse_device_start?

This is done to have vduse_device_start() being called in the same
context and so avoid deadlocks. I can add a comment about it, it should
be made clear I agree.

Thanks,
Maxime

> Thanks,
> Chenbo
> 
>>
>>         return 0;
>>
>> @@ -587,6 +770,9 @@ vduse_device_destroy(const char *path)
>>         if (vid == RTE_MAX_VHOST_DEVICE)
>>                 return -1;
>>
>> +       if (dev->reconnect_log)
>> +               munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
>> +
>>         vduse_device_stop(dev);
>>
>>         fdset_del(vduse.fdset, dev->vduse_dev_fd);
>> @@ -597,10 +783,26 @@ vduse_device_destroy(const char *path)
>>         }
>>
>>         if (dev->vduse_ctrl_fd >= 0) {
>> +               char reconnect_file[PATH_MAX];
>> +
>>                 ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name);
>> -               if (ret)
>> +               if (ret) {
>>                         VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s",
>>                                         strerror(errno));
>> +               } else {
>> +                       /*
>> +                        * VDUSE device was no more attached to the vDPA bus,
>> +                        * so we can remove the reconnect file.
>> +                        */
>> +                       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s",
>> +                                       vduse_reconnect_dir, name);
>> +                       if (ret < 0 || ret == sizeof(reconnect_file))
>> +                               VHOST_CONFIG_LOG(name, ERR,
>> +                                               "Failed to create vduse reconnect path name");
>> +                       else
>> +                               unlink(reconnect_file);
>> +               }
>> +
>>                 close(dev->vduse_ctrl_fd);
>>                 dev->vduse_ctrl_fd = -1;
>>         }
>> --
>> 2.46.0
>>
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] vhost: add reconnection support to VDUSE
  2024-09-06  7:14   ` Chenbo Xia
  2024-09-06  7:15     ` Chenbo Xia
  2024-09-06  7:48     ` Maxime Coquelin
@ 2024-09-06  9:20     ` Chenbo Xia
  2 siblings, 0 replies; 7+ messages in thread
From: Chenbo Xia @ 2024-09-06  9:20 UTC (permalink / raw)
  To: Maxime Coquelin; +Cc: dev, david.marchand

Hi Maxime,

> On Sep 5, 2024, at 22:26, Maxime Coquelin <maxime.coquelin@redhat.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch enables VDUSE reconnection support making use of
> the newly introduced reconnection mechanism in Vhost
> library.
> 
> At DPDK VDUSE device creation time, there are two
> possibilities:
> 1. The Kernel VDUSE device does not exist:
>  a. A reconnection file named after the VUDSE device name
>     is created in VDUSE tmpfs.
>  b. The file is truncated to 'struct vhost_reconnect_data'
>     size, and mmapped.
>  c. Negotiated features, Virtio status... are saved for
>     sanity checks at reconnect time.
> 2. The Kernel VDUSE device already exists:
>  a. Exit with failure if no reconnect file exists for
>     this device.
>  b. Open and mmap the reconnect file.
>  c. Perform sanity check to ensure features are compatible.
>  d. Restore virtqueues' available indexes at startup time.
> 
> Then at runtime, the virtqueues' available index are logged by
> the Vhost reconnection mechanism.
> 
> At DPDK VDUSE device destruction time, there are two
> possibilities:
> 1. The Kernel VDUSE device destruction succeed, which
>    means it is no more attached to the vDPA bus. The
>    reconnection file is unmapped and then removed.
> 2. The Kernel VDUSE device destruction failed, meaning it
>    is no more attached to the vDPA bus. The reconnection
>    file is unmapped but not removed to make possible later
>    reconnection.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/vhost/vduse.c | 280 +++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 241 insertions(+), 39 deletions(-)
> 
> diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
> index c66602905c..bd0e492d62 100644
> --- a/lib/vhost/vduse.c
> +++ b/lib/vhost/vduse.c
> @@ -136,7 +136,7 @@ vduse_control_queue_event(int fd, void *arg, int *remove __rte_unused)
> }
> 
> static void
> -vduse_vring_setup(struct virtio_net *dev, unsigned int index)
> +vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect)
> {
>        struct vhost_virtqueue *vq = dev->virtqueue[index];
>        struct vhost_vring_addr *ra = &vq->ring_addrs;
> @@ -152,6 +152,19 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>                return;
>        }
> 
> +       if (reconnect) {
> +               vq->last_avail_idx = vq->reconnect_log->last_avail_idx;
> +               vq->last_used_idx = vq->reconnect_log->last_avail_idx;
> +       } else {
> +               vq->last_avail_idx = vq_info.split.avail_index;
> +               vq->last_used_idx = vq_info.split.avail_index;
> +       }
> +       vq->size = vq_info.num;
> +       vq->ready = true;
> +       vq->enabled = vq_info.ready;
> +       ra->desc_user_addr = vq_info.desc_addr;
> +       ra->avail_user_addr = vq_info.driver_addr;
> +       ra->used_user_addr = vq_info.device_addr;
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx",
> @@ -162,15 +175,6 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int index)
>                        (unsigned long long)vq_info.device_addr);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", vq_info.split.avail_index);
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready);
> -
> -       vq->last_avail_idx = vq_info.split.avail_index;
> -       vq->size = vq_info.num;
> -       vq->ready = true;
> -       vq->enabled = vq_info.ready;
> -       ra->desc_user_addr = vq_info.desc_addr;
> -       ra->avail_user_addr = vq_info.driver_addr;
> -       ra->used_user_addr = vq_info.device_addr;
> -
>        vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>        if (vq->kickfd < 0) {
>                VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for VQ %u: %s",
> @@ -267,7 +271,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int index)
> }
> 
> static void
> -vduse_device_start(struct virtio_net *dev)
> +vduse_device_start(struct virtio_net *dev, bool reconnect)
> {
>        unsigned int i, ret;
> 
> @@ -287,6 +291,15 @@ vduse_device_start(struct virtio_net *dev)
>                return;
>        }
> 
> +       if (reconnect && dev->features != dev->reconnect_log->features) {
> +               VHOST_CONFIG_LOG(dev->ifname, ERR,
> +                               "Mismatch between reconnect file features 0x%" PRIx64 " & device features 0x%" PRIx64,

Checkpatch reports long line

> +                               dev->reconnect_log->features, dev->features);
> +               return;
> +       }
> +
> +       dev->reconnect_log->features = dev->features;
> +
>        VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" PRIx64,
>                dev->features);
> 
> @@ -300,7 +313,7 @@ vduse_device_start(struct virtio_net *dev)
>        }
> 
>        for (i = 0; i < dev->nr_vring; i++)
> -               vduse_vring_setup(dev, i);
> +               vduse_vring_setup(dev, i, reconnect);
> 
>        dev->flags |= VIRTIO_DEV_READY;
> 
> @@ -373,6 +386,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>                                req.s.status);
>                old_status = dev->status;
>                dev->status = req.s.status;
> +               dev->reconnect_log->status = dev->status;
>                resp.result = VDUSE_REQ_RESULT_OK;
>                break;
>        case VDUSE_UPDATE_IOTLB:
> @@ -398,7 +412,7 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
> 
>        if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) {
>                if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)
> -                       vduse_device_start(dev);
> +                       vduse_device_start(dev, false);
>                else
>                        vduse_device_stop(dev);
>        }
> @@ -407,10 +421,64 @@ vduse_events_handler(int fd, void *arg, int *remove __rte_unused)
>                        vduse_req_id_to_str(req.type), req.type);
> }
> 
> +static char vduse_reconnect_dir[PATH_MAX];
> +static bool vduse_reconnect_path_set;
> +
> +static int
> +vduse_reconnect_path_init(void)
> +{
> +       const char *directory;
> +       int ret;
> +
> +       /* from RuntimeDirectory= see systemd.exec */
> +       directory = getenv("RUNTIME_DIRECTORY");
> +       if (directory == NULL) {
> +               /*
> +                * Used standard convention defined in
> +                * XDG Base Directory Specification and
> +                * Filesystem Hierarchy Standard.
> +                */
> +               if (getuid() == 0)
> +                       directory = "/var/run";
> +               else
> +                       directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp";
> +       }
> +
> +       ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), "%s/dpdk/vduse",
> +                       directory);
> +       if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) {
> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE reconnect path name");
> +               return -1;
> +       }
> +
> +       ret = mkdir(vduse_reconnect_dir, 0700);
> +       if (ret < 0 && errno != EEXIST) {
> +               VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s",
> +                               vduse_reconnect_dir, strerror(errno));
> +               return -1;
> +       }
> +
> +       VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in %s",
> +                       vduse_reconnect_dir);
> +
> +       return 0;
> +}
> +
> +static void
> +vduse_reconnect_handler(int fd, void *arg, int *remove)
> +{
> +       struct virtio_net *dev = arg;
> +
> +       vduse_device_start(dev, true);
> +
> +       close(fd);
> +       *remove = 1;
> +}
> +
> int
> vduse_device_create(const char *path, bool compliant_ol_flags)
> {
> -       int control_fd, dev_fd, vid, ret;
> +       int control_fd, dev_fd, vid, ret, reco_fd;
>        uint32_t i, max_queue_pairs, total_queues;
>        struct virtio_net *dev;
>        struct virtio_net_config vnet_config = {{ 0 }};
> @@ -418,6 +486,9 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        uint64_t features;
>        struct vduse_dev_config *dev_config = NULL;
>        const char *name = path + strlen("/dev/vduse/");
> +       char reconnect_file[PATH_MAX];
> +       struct vhost_reconnect_data *reconnect_log = NULL;
> +       bool reconnect = false;
> 
>        if (vduse.fdset == NULL) {
>                vduse.fdset = fdset_init("vduse-evt");
> @@ -427,6 +498,20 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                }
>        }
> 
> +       if (vduse_reconnect_path_set == false) {
> +               if (vduse_reconnect_path_init() < 0) {
> +                       VHOST_CONFIG_LOG(path, ERR, "failed to initialize reconnect path");
> +                       return -1;
> +               }
> +               vduse_reconnect_path_set = true;
> +       }
> +
> +       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s", vduse_reconnect_dir, name);
> +       if (ret < 0 || ret == sizeof(reconnect_file)) {
> +               VHOST_CONFIG_LOG(name, ERR, "Failed to create vduse reconnect path name");
> +               return -1;
> +       }
> +
>        control_fd = open(VDUSE_CTRL_PATH, O_RDWR);
>        if (control_fd < 0) {
>                VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s",
> @@ -441,14 +526,6 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                goto out_ctrl_close;
>        }
> 
> -       dev_config = malloc(offsetof(struct vduse_dev_config, config) +
> -                       sizeof(vnet_config));
> -       if (!dev_config) {
> -               VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
> -               ret = -1;
> -               goto out_ctrl_close;
> -       }
> -
>        ret = rte_vhost_driver_get_features(path, &features);
>        if (ret < 0) {
>                VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features");
> @@ -469,23 +546,97 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        else
>                total_queues += 1; /* Includes ctrl queue */
> 
> -       vnet_config.max_virtqueue_pairs = max_queue_pairs;
> -       memset(dev_config, 0, sizeof(struct vduse_dev_config));
> +       if (access(path, F_OK) == 0) {
> +               VHOST_CONFIG_LOG(name, INFO, "Device already exists, reconnecting...");
> +               reconnect = true;
> +
> +               reco_fd = open(reconnect_file, O_RDWR, 0600);
> +               if (reco_fd < 0) {
> +                       if (errno == ENOENT) {
> +                               VHOST_CONFIG_LOG(name, ERR, "Missing reconnect file (%s)",
> +                                               reconnect_file);
> +                       } else {
> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
> +                                               reconnect_file, strerror(errno));
> +                       }

Seems no {} is needed for if-else?

> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
> +                               MAP_SHARED, reco_fd, 0);
> +               close(reco_fd);
> +               if (reconnect_log == MAP_FAILED) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +       } else {
> +               reco_fd = open(reconnect_file, O_CREAT | O_EXCL | O_RDWR, 0600);
> +               if (reco_fd < 0) {
> +                       if (errno == EEXIST) {
> +                               VHOST_CONFIG_LOG(name, ERR, "Reconnect file %s exists but not the device",
> +                                               reconnect_file);
> +                       } else {
> +                               VHOST_CONFIG_LOG(name, ERR, "Failed to open reconnect file %s (%s)",
> +                                               reconnect_file, strerror(errno));
> +                       }
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> 
> -       strncpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
> -       dev_config->device_id = VIRTIO_ID_NET;
> -       dev_config->vendor_id = 0;
> -       dev_config->features = features;
> -       dev_config->vq_num = total_queues;
> -       dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
> -       dev_config->config_size = sizeof(struct virtio_net_config);
> -       memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
> +               ret = ftruncate(reco_fd, sizeof(*reconnect_log));
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to truncate reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       close(reco_fd);
> +                       goto out_ctrl_close;
> +               }
> 
> -       ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
> -       if (ret < 0) {
> -               VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
> -                               strerror(errno));
> -               goto out_free;
> +               reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ | PROT_WRITE,
> +                                       MAP_SHARED, reco_fd, 0);
> +               close(reco_fd);
> +               if (reconnect_log == MAP_FAILED) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect file %s (%s)",
> +                                       reconnect_file, strerror(errno));
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               reconnect_log->version = 0;
> +
> +               dev_config = malloc(offsetof(struct vduse_dev_config, config) +
> +                               sizeof(vnet_config));
> +               if (!dev_config) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE config");
> +                       ret = -1;
> +                       goto out_ctrl_close;
> +               }
> +
> +               vnet_config.max_virtqueue_pairs = max_queue_pairs;
> +               memset(dev_config, 0, sizeof(struct vduse_dev_config));
> +
> +               rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1);
> +               dev_config->device_id = VIRTIO_ID_NET;
> +               dev_config->vendor_id = 0;
> +               dev_config->features = features;
> +               dev_config->vq_num = total_queues;
> +               dev_config->vq_align = sysconf(_SC_PAGE_SIZE);
> +               dev_config->config_size = sizeof(struct virtio_net_config);
> +               memcpy(dev_config->config, &vnet_config, sizeof(vnet_config));
> +
> +               ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config);
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: %s",
> +                                       strerror(errno));
> +                       goto out_free;
> +               }
> +
> +               memcpy(&reconnect_log->config, &vnet_config, sizeof(vnet_config));
> +               reconnect_log->nr_vrings = total_queues;
> +               free(dev_config);
> +               dev_config = NULL;
>        }
> 
>        dev_fd = open(path, O_RDWR);
> @@ -519,10 +670,15 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>        strncpy(dev->ifname, path, IF_NAME_SZ - 1);
>        dev->vduse_ctrl_fd = control_fd;
>        dev->vduse_dev_fd = dev_fd;
> +       dev->reconnect_log = reconnect_log;
> +       if (reconnect)
> +               dev->status = dev->reconnect_log->status;
> +
>        vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true);
> 
>        for (i = 0; i < total_queues; i++) {
>                struct vduse_vq_config vq_cfg = { 0 };
> +               struct vhost_virtqueue *vq;
> 
>                ret = alloc_vring_queue(dev, i);
>                if (ret) {
> @@ -530,6 +686,12 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                        goto out_dev_destroy;
>                }
> 
> +               vq = dev->virtqueue[i];
> +               vq->reconnect_log = &reconnect_log->vring[i];
> +
> +               if (reconnect)
> +                       continue;
> +
>                vq_cfg.index = i;
>                vq_cfg.max_size = 1024;
> 
> @@ -549,7 +711,28 @@ vduse_device_create(const char *path, bool compliant_ol_flags)
>                goto out_dev_destroy;
>        }
> 
> -       free(dev_config);
> +       if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK)  {
> +               reco_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +               if (reco_fd < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to create reco_fd: %s",
> +                                       strerror(errno));
> +                       ret = -1;
> +                       goto out_dev_destroy;
> +               }
> +
> +               ret = fdset_add(vduse.fdset, reco_fd, vduse_reconnect_handler, NULL, dev);
> +               if (ret) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to add fd %d to vduse fdset",
> +                                       dev->vduse_dev_fd);

Should print reco_fd

> +                       goto out_dev_destroy;
> +               }
> +
> +               ret = eventfd_write(reco_fd, (eventfd_t)1);
> +               if (ret < 0) {
> +                       VHOST_CONFIG_LOG(name, ERR, "Failed to write to reconnect eventfd");
> +                       goto out_dev_destroy;
> +               }
> +       }

Maybe I missed something, why we need to implement like this instead of directly call vduse_device_start?

Thanks,
Chenbo

> 
>        return 0;
> 
> @@ -587,6 +770,9 @@ vduse_device_destroy(const char *path)
>        if (vid == RTE_MAX_VHOST_DEVICE)
>                return -1;
> 
> +       if (dev->reconnect_log)
> +               munmap(dev->reconnect_log, sizeof(*dev->reconnect_log));
> +
>        vduse_device_stop(dev);
> 
>        fdset_del(vduse.fdset, dev->vduse_dev_fd);
> @@ -597,10 +783,26 @@ vduse_device_destroy(const char *path)
>        }
> 
>        if (dev->vduse_ctrl_fd >= 0) {
> +               char reconnect_file[PATH_MAX];
> +
>                ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name);
> -               if (ret)
> +               if (ret) {
>                        VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE device: %s",
>                                        strerror(errno));
> +               } else {
> +                       /*
> +                        * VDUSE device was no more attached to the vDPA bus,
> +                        * so we can remove the reconnect file.
> +                        */
> +                       ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s",
> +                                       vduse_reconnect_dir, name);
> +                       if (ret < 0 || ret == sizeof(reconnect_file))
> +                               VHOST_CONFIG_LOG(name, ERR,
> +                                               "Failed to create vduse reconnect path name");
> +                       else
> +                               unlink(reconnect_file);
> +               }
> +
>                close(dev->vduse_ctrl_fd);
>                dev->vduse_ctrl_fd = -1;
>        }
> --
> 2.46.0
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-09-06  9:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-09-05 14:26 [PATCH 0/2] vhost: add VDUSE reconnection support Maxime Coquelin
2024-09-05 14:26 ` [PATCH 1/2] vhost: add logging mechanism for reconnection Maxime Coquelin
2024-09-05 14:26 ` [PATCH 2/2] vhost: add reconnection support to VDUSE Maxime Coquelin
2024-09-06  7:14   ` Chenbo Xia
2024-09-06  7:15     ` Chenbo Xia
2024-09-06  7:48     ` Maxime Coquelin
2024-09-06  9:20     ` Chenbo Xia

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).