DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example
@ 2020-02-28 15:32 Jin Yu
  2020-04-27  8:01 ` Maxime Coquelin
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Jin Yu @ 2020-02-28 15:32 UTC (permalink / raw)
  To: Maxime Coquelin, Tiwei Bie, Zhihong Wang, John McNamara, Marko Kovacevic
  Cc: dev, Jin Yu

Decrease the code and make it easier to read. It's
useful for understanding the inflight APIs and how
packed ring works. Update the RST because the packed
ring patch has been merged to QEMU master and ring_packed
parameter changes to packed.

Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample")

Signed-off-by: Jin Yu <jin.yu@intel.com>
---
 doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
 examples/vhost_blk/blk.c               |   13 +-
 examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
 examples/vhost_blk/vhost_blk.h         |   39 +-
 4 files changed, 494 insertions(+), 705 deletions(-)

diff --git a/doc/guides/sample_app_ug/vhost_blk.rst b/doc/guides/sample_app_ug/vhost_blk.rst
index 39096e2e4..681de6f3e 100644
--- a/doc/guides/sample_app_ug/vhost_blk.rst
+++ b/doc/guides/sample_app_ug/vhost_blk.rst
@@ -51,7 +51,7 @@ Start the VM
         -drive file=os.img,if=none,id=disk \
         -device ide-hd,drive=disk,bootindex=0 \
         -chardev socket,id=char0,reconnect=1,path=/tmp/vhost.socket \
-        -device vhost-user-blk-pci,ring_packed=1,chardev=char0,num-queues=1 \
+        -device vhost-user-blk-pci,packed=on,chardev=char0,num-queues=1 \
         ...
 
 .. note::
@@ -59,5 +59,7 @@ Start the VM
     Qemu v4.0 or newer version is required.
     reconnect=1 means live recovery support that qemu can reconnect vhost_blk
     after we restart vhost_blk example.
-    ring_packed=1 means the device support packed ring but need the guest kernel
-    version >= 5.0
+    packed=on means the device support packed ring but need the guest kernel
+    version >= 5.0.
+    Now Qemu commit 9bb73502321d46f4d320fa17aa38201445783fc4 both support the
+    vhost-blk reconnect and packed ring.
diff --git a/examples/vhost_blk/blk.c b/examples/vhost_blk/blk.c
index 1b0b764b2..9048e2f8a 100644
--- a/examples/vhost_blk/blk.c
+++ b/examples/vhost_blk/blk.c
@@ -50,7 +50,10 @@ vhost_bdev_blk_readwrite(struct vhost_block_dev *bdev,
 
 	offset = lba_512 * 512;
 
-	for (i = 0; i < task->iovs_cnt; i++) {
+	/* iovs[0] is the head and iovs[iovs_cnt - 1] is the tail
+	 * Middle is the data range
+	 */
+	for (i = 1; i < task->iovs_cnt - 1; i++) {
 		if (task->dxfer_dir == BLK_DIR_TO_DEV)
 			memcpy(bdev->data + offset, task->iovs[i].iov_base,
 			       task->iovs[i].iov_len);
@@ -83,7 +86,7 @@ vhost_bdev_process_blk_commands(struct vhost_block_dev *bdev,
 				"%s - passed IO buffer is not multiple of 512b"
 				"(req_idx = %"PRIu16").\n",
 				task->req->type ? "WRITE" : "READ",
-				task->head_idx);
+				task->req_idx);
 			return VIRTIO_BLK_S_UNSUPP;
 		}
 
@@ -98,14 +101,10 @@ vhost_bdev_process_blk_commands(struct vhost_block_dev *bdev,
 				"%s - passed IO buffer is not multiple of 512b"
 				"(req_idx = %"PRIu16").\n",
 				task->req->type ? "WRITE" : "READ",
-				task->head_idx);
+				task->req_idx);
 			return VIRTIO_BLK_S_UNSUPP;
 		}
 
-		if (task->readtype) {
-			fprintf(stderr, "type isn't right\n");
-			return VIRTIO_BLK_S_IOERR;
-		}
 		task->dxfer_dir = BLK_DIR_TO_DEV;
 		vhost_bdev_blk_readwrite(bdev, task,
 					 task->req->sector, task->data_len);
diff --git a/examples/vhost_blk/vhost_blk.c b/examples/vhost_blk/vhost_blk.c
index 74c82a900..38e4e155c 100644
--- a/examples/vhost_blk/vhost_blk.c
+++ b/examples/vhost_blk/vhost_blk.c
@@ -26,15 +26,22 @@
 
 #define MAX_TASK		12
 
-#define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \
+#define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) |\
 			    (1ULL << VIRTIO_F_VERSION_1) |\
 			    (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
 			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
 
+#define CTRLR_NAME		"vhost.socket"
+
+enum CTRLR_WORKER_STATUS {
+	WORKER_STATE_START = 0,
+	WORKER_STATE_STOP,
+};
+
 /* Path to folder where character device will be created. Can be set by user. */
 static char dev_pathname[PATH_MAX] = "";
 static sem_t exit_sem;
-static int g_should_stop = -1;
+static enum CTRLR_WORKER_STATUS worker_thread_status;
 
 struct vhost_blk_ctrlr *
 vhost_blk_ctrlr_find(const char *ctrlr_name)
@@ -46,716 +53,478 @@ vhost_blk_ctrlr_find(const char *ctrlr_name)
 	return g_vhost_ctrlr;
 }
 
-static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
+static uint64_t
+gpa_to_vva(struct vhost_blk_ctrlr *ctrlr, uint64_t gpa, uint64_t *len)
 {
-	char path[PATH_MAX];
-	struct vhost_blk_ctrlr *ctrlr;
-	int ret = 0;
-
-	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
-	if (ret) {
-		fprintf(stderr, "Cannot get socket name\n");
-		assert(ret != 0);
-	}
-
-	ctrlr = vhost_blk_ctrlr_find(path);
-	if (!ctrlr) {
-		fprintf(stderr, "Controller is not ready\n");
-		assert(ctrlr != NULL);
-	}
-
 	assert(ctrlr->mem != NULL);
 
 	return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
 }
 
-static struct vring_packed_desc *
-descriptor_get_next_packed(struct rte_vhost_vring *vq,
-			     uint16_t *idx)
+static void
+enqueue_task(struct vhost_blk_task *task)
 {
-	if (vq->desc_packed[*idx % vq->size].flags & VIRTQ_DESC_F_NEXT) {
-		*idx += 1;
-		return &vq->desc_packed[*idx % vq->size];
-	}
+	struct vhost_blk_queue *vq = task->vq;
+	struct vring_used *used = vq->vring.used;
 
-	return NULL;
-}
+	rte_vhost_set_last_inflight_io_split(task->ctrlr->vid,
+		vq->id, task->req_idx);
 
-static bool
-descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
-{
-	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
-}
+	/* Fill out the next entry in the "used" ring.  id = the
+	 * index of the descriptor that contained the blk request.
+	 * len = the total amount of data transferred for the blk
+	 * request. We must report the correct len, for variable
+	 * length blk CDBs, where we may return less data than
+	 * allocated by the guest VM.
+	 */
+	used->ring[used->idx & (vq->vring.size - 1)].id = task->req_idx;
+	used->ring[used->idx & (vq->vring.size - 1)].len = task->data_len;
+	rte_smp_mb();
+	used->idx++;
+	rte_smp_mb();
 
-static bool
-descriptor_is_wr_packed(struct vring_packed_desc *cur_desc)
-{
-	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+	rte_vhost_clr_inflight_desc_split(task->ctrlr->vid,
+		vq->id, used->idx, task->req_idx);
+
+	/* Send an interrupt back to the guest VM so that it knows
+	 * a completion is ready to be processed.
+	 */
+	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
 }
 
-static struct rte_vhost_inflight_desc_packed *
-inflight_desc_get_next(struct rte_vhost_inflight_info_packed *inflight_packed,
-			       struct rte_vhost_inflight_desc_packed *cur_desc)
+static void
+enqueue_task_packed(struct vhost_blk_task *task)
 {
-	if (!!(cur_desc->flags & VIRTQ_DESC_F_NEXT))
-		return &inflight_packed->desc[cur_desc->next];
+	struct vhost_blk_queue *vq = task->vq;
+	struct vring_packed_desc *desc;
 
-	return NULL;
+	rte_vhost_set_last_inflight_io_packed(task->ctrlr->vid, vq->id,
+					    task->inflight_idx);
+
+	desc = &vq->vring.desc_packed[vq->last_used_idx];
+	desc->id = task->buffer_id;
+	desc->addr = 0;
+
+	rte_smp_mb();
+	if (vq->used_wrap_counter)
+		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
+	else
+		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
+	rte_smp_mb();
+
+	rte_vhost_clr_inflight_desc_packed(task->ctrlr->vid, vq->id,
+					   task->inflight_idx);
+
+	vq->last_used_idx += task->chain_num;
+	if (vq->last_used_idx >= vq->vring.size) {
+		vq->last_used_idx -= vq->vring.size;
+		vq->used_wrap_counter = !vq->used_wrap_counter;
+	}
+
+	/* Send an interrupt back to the guest VM so that it knows
+	 * a completion is ready to be processed.
+	 */
+	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
 }
 
 static bool
-inflight_desc_has_next(struct rte_vhost_inflight_desc_packed *cur_desc)
+descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
 {
 	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
 }
 
 static bool
-inflight_desc_is_wr(struct rte_vhost_inflight_desc_packed *cur_desc)
+descriptor_has_next_split(struct vring_desc *cur_desc)
 {
-	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
 }
 
-static void
-inflight_process_payload_chain_packed(struct inflight_blk_task *task)
+static int
+desc_payload_to_iovs(struct vhost_blk_ctrlr *ctrlr, struct iovec *iovs,
+		     uint32_t *iov_index, uintptr_t payload, uint64_t remaining)
 {
-	void *data;
-	uint64_t chunck_len;
-	struct vhost_blk_task *blk_task;
-	struct rte_vhost_inflight_desc_packed *desc;
-
-	blk_task = &task->blk_task;
-	blk_task->iovs_cnt = 0;
+	void *vva;
+	uint64_t len;
 
 	do {
-		desc = task->inflight_desc;
-		chunck_len = desc->len;
-		data = (void *)(uintptr_t)gpa_to_vva(blk_task->bdev->vid,
-						     desc->addr,
-						     &chunck_len);
-		if (!data || chunck_len != desc->len) {
+		if (*iov_index >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "VHOST_BLK_MAX_IOVS reached\n");
+			return -1;
+		}
+		len = remaining;
+		vva = (void *)gpa_to_vva(ctrlr,
+				 payload, &len);
+		if (!vva || !len) {
 			fprintf(stderr, "failed to translate desc address.\n");
-			return;
+			return -1;
 		}
 
-		blk_task->iovs[blk_task->iovs_cnt].iov_base = data;
-		blk_task->iovs[blk_task->iovs_cnt].iov_len = desc->len;
-		blk_task->data_len += desc->len;
-		blk_task->iovs_cnt++;
-		task->inflight_desc = inflight_desc_get_next(
-					task->inflight_packed, desc);
-	} while (inflight_desc_has_next(task->inflight_desc));
-
-	chunck_len = task->inflight_desc->len;
-	blk_task->status = (void *)(uintptr_t)gpa_to_vva(
-		blk_task->bdev->vid, task->inflight_desc->addr, &chunck_len);
-	if (!blk_task->status || chunck_len != task->inflight_desc->len)
-		fprintf(stderr, "failed to translate desc address.\n");
+		iovs[*iov_index].iov_base = vva;
+		iovs[*iov_index].iov_len = len;
+		payload += len;
+		remaining -= len;
+		(*iov_index)++;
+	} while (remaining);
+
+	return 0;
 }
 
-static void
-inflight_submit_completion_packed(struct inflight_blk_task *task,
-					      uint32_t q_idx, uint16_t *used_id,
-					      bool *used_wrap_counter)
+static struct vring_desc *
+vring_get_next_desc(struct vhost_blk_queue *vq, struct vring_desc *desc)
 {
-	struct vhost_blk_ctrlr *ctrlr;
-	struct rte_vhost_vring *vq;
-	struct vring_packed_desc *desc;
-	int ret;
+	if (descriptor_has_next_split(desc))
+		return &vq->vring.desc[desc->next];
 
-	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
-	vq = task->blk_task.vq;
-
-	ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
-						    task->blk_task.head_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to set last inflight io\n");
-
-	desc = &vq->desc_packed[*used_id];
-	desc->id = task->blk_task.buffer_id;
-	rte_smp_mb();
-	if (*used_wrap_counter)
-		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
-	else
-		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
-	rte_smp_mb();
+	return NULL;
+}
 
-	*used_id += task->blk_task.iovs_cnt + 2;
-	if (*used_id >= vq->size) {
-		*used_id -= vq->size;
-		*used_wrap_counter = !(*used_wrap_counter);
+static struct vring_packed_desc *
+vring_get_next_desc_packed(struct vhost_blk_queue *vq, uint16_t *req_idx)
+{
+	if (descriptor_has_next_packed(&vq->vring.desc_packed[*req_idx])) {
+		*req_idx = (*req_idx + 1) % vq->vring.size;
+		return &vq->vring.desc_packed[*req_idx];
 	}
 
-	ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
-						 task->blk_task.head_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to clear inflight io\n");
+	return NULL;
+}
+
+static struct rte_vhost_inflight_desc_packed *
+vring_get_next_inflight_desc(struct vhost_blk_queue *vq,
+			struct rte_vhost_inflight_desc_packed *desc)
+{
+	if (!!(desc->flags & VRING_DESC_F_NEXT))
+		return &vq->inflight_ring.inflight_packed->desc[desc->next];
 
-	/* Send an interrupt back to the guest VM so that it knows
-	 * a completion is ready to be processed.
-	 */
-	rte_vhost_vring_call(task->blk_task.bdev->vid, q_idx);
+	return NULL;
 }
 
-static void
-submit_completion_packed(struct vhost_blk_task *task, uint32_t q_idx,
-				  uint16_t *used_id, bool *used_wrap_counter)
+static int
+setup_iovs_from_descs_split(struct vhost_blk_ctrlr *ctrlr,
+			    struct vhost_blk_queue *vq, uint16_t req_idx,
+			    struct iovec *iovs, uint32_t *iovs_idx,
+			    uint32_t *payload)
 {
-	struct vhost_blk_ctrlr *ctrlr;
-	struct rte_vhost_vring *vq;
-	struct vring_packed_desc *desc;
-	int ret;
+	struct vring_desc *desc = &vq->vring.desc[req_idx];
 
-	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
-	vq = task->vq;
+	do {
+		/* does not support indirect descriptors */
+		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
 
-	ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
-						    task->inflight_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to set last inflight io\n");
+		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
+			return -1;
+		}
 
-	desc = &vq->desc_packed[*used_id];
-	desc->id = task->buffer_id;
-	rte_smp_mb();
-	if (*used_wrap_counter)
-		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
-	else
-		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
-	rte_smp_mb();
+		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
+			desc->addr, desc->len) != 0) {
+			fprintf(stderr, "Failed to convert desc payload to iovs\n");
+			return -1;
+		}
 
-	*used_id += task->iovs_cnt + 2;
-	if (*used_id >= vq->size) {
-		*used_id -= vq->size;
-		*used_wrap_counter = !(*used_wrap_counter);
-	}
+		*payload += desc->len;
 
-	ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
-						 task->inflight_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to clear inflight io\n");
+		desc = vring_get_next_desc(vq, desc);
+	} while (desc != NULL);
 
-	/* Send an interrupt back to the guest VM so that it knows
-	 * a completion is ready to be processed.
-	 */
-	rte_vhost_vring_call(task->bdev->vid, q_idx);
+	return 0;
 }
 
-static void
-vhost_process_payload_chain_packed(struct vhost_blk_task *task,
-	uint16_t *idx)
+static int
+setup_iovs_from_descs_packed(struct vhost_blk_ctrlr *ctrlr,
+			     struct vhost_blk_queue *vq, uint16_t req_idx,
+			     struct iovec *iovs, uint32_t *iovs_idx,
+			     uint32_t *payload)
 {
-	void *data;
-	uint64_t chunck_len;
-
-	task->iovs_cnt = 0;
+	struct vring_packed_desc *desc = &vq->vring.desc_packed[req_idx];
 
 	do {
-		chunck_len = task->desc_packed->len;
-		data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						     task->desc_packed->addr,
-							 &chunck_len);
-		if (!data || chunck_len != task->desc_packed->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			return;
+		/* does not support indirect descriptors */
+		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
+
+		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
+			return -1;
 		}
 
-		task->iovs[task->iovs_cnt].iov_base = data;
-		task->iovs[task->iovs_cnt].iov_len = task->desc_packed->len;
-		task->data_len += task->desc_packed->len;
-		task->iovs_cnt++;
-		task->desc_packed = descriptor_get_next_packed(task->vq, idx);
-	} while (descriptor_has_next_packed(task->desc_packed));
-
-	task->last_idx = *idx % task->vq->size;
-	chunck_len = task->desc_packed->len;
-	task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						   task->desc_packed->addr,
-						   &chunck_len);
-	if (!task->status || chunck_len != task->desc_packed->len)
-		fprintf(stderr, "failed to translate desc address.\n");
-}
+		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
+			desc->addr, desc->len) != 0) {
+			fprintf(stderr, "Failed to convert desc payload to iovs\n");
+			return -1;
+		}
 
+		*payload += desc->len;
 
-static int
-descriptor_is_available(struct rte_vhost_vring *vring, uint16_t idx,
-					bool avail_wrap_counter)
-{
-	uint16_t flags = vring->desc_packed[idx].flags;
+		desc = vring_get_next_desc_packed(vq, &req_idx);
+	} while (desc != NULL);
 
-	return ((!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter) &&
-		(!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter));
+	return 0;
 }
 
-static void
-process_requestq_packed(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
+static int
+setup_iovs_from_inflight_desc(struct vhost_blk_ctrlr *ctrlr,
+			      struct vhost_blk_queue *vq, uint16_t req_idx,
+			      struct iovec *iovs, uint32_t *iovs_idx,
+			      uint32_t *payload)
 {
-	bool avail_wrap_counter, used_wrap_counter;
-	uint16_t avail_idx, used_idx;
-	int ret;
-	uint64_t chunck_len;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_vring *vq;
-	struct vhost_blk_task *task;
-
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	vq = &blk_vq->vq;
-
-	avail_idx = blk_vq->last_avail_idx;
-	avail_wrap_counter = blk_vq->avail_wrap_counter;
-	used_idx = blk_vq->last_used_idx;
-	used_wrap_counter = blk_vq->used_wrap_counter;
-
-	task = rte_zmalloc(NULL, sizeof(*task), 0);
-	assert(task != NULL);
-	task->vq = vq;
-	task->bdev = ctrlr->bdev;
+	struct rte_vhost_ring_inflight *inflight_vq;
+	struct rte_vhost_inflight_desc_packed *desc;
 
-	while (descriptor_is_available(vq, avail_idx, avail_wrap_counter)) {
-		task->head_idx = avail_idx;
-		task->desc_packed = &task->vq->desc_packed[task->head_idx];
-		task->iovs_cnt = 0;
-		task->data_len = 0;
-		task->req = NULL;
-		task->status = NULL;
+	inflight_vq = &vq->inflight_ring;
+	desc = &inflight_vq->inflight_packed->desc[req_idx];
 
+	do {
 		/* does not support indirect descriptors */
-		assert((task->desc_packed->flags & VRING_DESC_F_INDIRECT) == 0);
+		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
 
-		chunck_len = task->desc_packed->len;
-		task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-			task->desc_packed->addr, &chunck_len);
-		if (!task->req || chunck_len != task->desc_packed->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
+		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
+			return -1;
 		}
 
-		task->desc_packed = descriptor_get_next_packed(task->vq,
-								&avail_idx);
-		assert(task->desc_packed != NULL);
-		if (!descriptor_has_next_packed(task->desc_packed)) {
-			task->dxfer_dir = BLK_DIR_NONE;
-			task->last_idx = avail_idx % vq->size;
-			chunck_len = task->desc_packed->len;
-			task->status = (void *)(uintptr_t)
-					      gpa_to_vva(task->bdev->vid,
-							task->desc_packed->addr,
-							&chunck_len);
-			if (!task->status ||
-				chunck_len != task->desc_packed->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
-				return;
-			}
-		} else {
-			task->readtype = descriptor_is_wr_packed(
-							task->desc_packed);
-			vhost_process_payload_chain_packed(task, &avail_idx);
-		}
-		task->buffer_id = vq->desc_packed[task->last_idx].id;
-		rte_vhost_set_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
-						   task->head_idx,
-						   task->last_idx,
-						   &task->inflight_idx);
-
-		if (++avail_idx >= vq->size) {
-			avail_idx -= vq->size;
-			avail_wrap_counter = !avail_wrap_counter;
+		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
+			desc->addr, desc->len) != 0) {
+			fprintf(stderr, "Failed to convert desc payload to iovs\n");
+			return -1;
 		}
-		blk_vq->last_avail_idx = avail_idx;
-		blk_vq->avail_wrap_counter = avail_wrap_counter;
 
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
-		if (ret) {
-			/* invalid response */
-			*task->status = VIRTIO_BLK_S_IOERR;
-		} else {
-			/* successfully */
-			*task->status = VIRTIO_BLK_S_OK;
-		}
+		*payload += desc->len;
 
-		submit_completion_packed(task, q_idx, &used_idx,
-						&used_wrap_counter);
-		blk_vq->last_used_idx = used_idx;
-		blk_vq->used_wrap_counter = used_wrap_counter;
-	}
+		desc = vring_get_next_inflight_desc(vq, desc);
+	} while (desc != NULL);
 
-	rte_free(task);
+	return 0;
 }
 
 static void
-submit_inflight_vq_packed(struct vhost_blk_ctrlr *ctrlr,
-	uint16_t q_idx)
+process_blk_task(struct vhost_blk_task *task)
 {
-	bool used_wrap_counter;
-	int req_idx, ret;
-	uint16_t used_idx;
-	uint64_t chunck_len;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_ring_inflight *inflight_vq;
-	struct rte_vhost_resubmit_info *resubmit_info;
-	struct rte_vhost_vring *vq;
-	struct inflight_blk_task *task;
-	struct vhost_blk_task *blk_task;
-	struct rte_vhost_inflight_info_packed *inflight_info;
-
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	vq = &blk_vq->vq;
-	inflight_vq = &blk_vq->inflight_vq;
-	resubmit_info = inflight_vq->resubmit_inflight;
-	inflight_info = inflight_vq->inflight_packed;
-	used_idx = blk_vq->last_used_idx;
-	used_wrap_counter = blk_vq->used_wrap_counter;
-
-	task = rte_malloc(NULL, sizeof(*task), 0);
-	if (!task) {
-		fprintf(stderr, "failed to allocate memory\n");
-		return;
-	}
-	blk_task = &task->blk_task;
-	blk_task->vq = vq;
-	blk_task->bdev = ctrlr->bdev;
-	task->inflight_packed = inflight_vq->inflight_packed;
-
-	while (resubmit_info->resubmit_num-- > 0) {
-		req_idx = resubmit_info->resubmit_num;
-		blk_task->head_idx =
-			resubmit_info->resubmit_list[req_idx].index;
-		task->inflight_desc =
-			&inflight_info->desc[blk_task->head_idx];
-		task->blk_task.iovs_cnt = 0;
-		task->blk_task.data_len = 0;
-		task->blk_task.req = NULL;
-		task->blk_task.status = NULL;
-
-		/* update the avail idx too
-		 * as it's initial value equals to used idx
-		 */
-		blk_vq->last_avail_idx += task->inflight_desc->num;
-		if (blk_vq->last_avail_idx >= vq->size) {
-			blk_vq->last_avail_idx -= vq->size;
-			blk_vq->avail_wrap_counter =
-				!blk_vq->avail_wrap_counter;
-		}
+	uint32_t payload = 0;
 
-		/* does not support indirect descriptors */
-		assert(task->inflight_desc != NULL);
-		assert((task->inflight_desc->flags &
-			VRING_DESC_F_INDIRECT) == 0);
-
-		chunck_len = task->inflight_desc->len;
-		blk_task->req = (void *)(uintptr_t)
-				     gpa_to_vva(blk_task->bdev->vid,
-						task->inflight_desc->addr,
-						&chunck_len);
-		if (!blk_task->req ||
-			chunck_len != task->inflight_desc->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
-		}
+	if (task->vq->packed_ring) {
+		struct rte_vhost_ring_inflight *inflight_ring;
+		struct rte_vhost_resubmit_info *resubmit_inflight;
 
-		task->inflight_desc = inflight_desc_get_next(
-			task->inflight_packed, task->inflight_desc);
-		assert(task->inflight_desc != NULL);
-		if (!inflight_desc_has_next(task->inflight_desc)) {
-			blk_task->dxfer_dir = BLK_DIR_NONE;
-			chunck_len = task->inflight_desc->len;
-			blk_task->status = (void *)(uintptr_t)
-				gpa_to_vva(blk_task->bdev->vid,
-						task->inflight_desc->addr,
-						&chunck_len);
-			if (!blk_task->status ||
-			    chunck_len != task->inflight_desc->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
+		inflight_ring = &task->vq->inflight_ring;
+		resubmit_inflight = inflight_ring->resubmit_inflight;
+
+		if (resubmit_inflight != NULL &&
+		    resubmit_inflight->resubmit_list != NULL) {
+			if (setup_iovs_from_inflight_desc(task->ctrlr, task->vq,
+				task->req_idx, task->iovs, &task->iovs_cnt,
+				&payload)) {
+				fprintf(stderr, "Failed to setup iovs\n");
 				return;
 			}
 		} else {
-			blk_task->readtype =
-			inflight_desc_is_wr(task->inflight_desc);
-			inflight_process_payload_chain_packed(task);
+			if (setup_iovs_from_descs_packed(task->ctrlr, task->vq,
+				task->req_idx, task->iovs, &task->iovs_cnt,
+				&payload)) {
+				fprintf(stderr, "Failed to setup iovs\n");
+				return;
+			}
 		}
+	} else {
+		if (setup_iovs_from_descs_split(task->ctrlr, task->vq,
+			task->req_idx, task->iovs, &task->iovs_cnt, &payload)) {
+			fprintf(stderr, "Failed to setup iovs\n");
+			return;
+		}
+	}
 
-		blk_task->buffer_id = task->inflight_desc->id;
-
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, blk_task);
-		if (ret)
-			/* invalid response */
-			*blk_task->status = VIRTIO_BLK_S_IOERR;
-		else
-			/* successfully */
-			*blk_task->status = VIRTIO_BLK_S_OK;
-
-		inflight_submit_completion_packed(task, q_idx, &used_idx,
-						  &used_wrap_counter);
+	/* First IOV must be the req head. */
+	task->req = (struct virtio_blk_outhdr *)task->iovs[0].iov_base;
+	assert(sizeof(*task->req) == task->iovs[0].iov_len);
 
-		blk_vq->last_used_idx = used_idx;
-		blk_vq->used_wrap_counter = used_wrap_counter;
-	}
+	/* Last IOV must be the status tail. */
+	task->status = (uint8_t *)task->iovs[task->iovs_cnt - 1].iov_base;
+	assert(sizeof(*task->status) == task->iovs[task->iovs_cnt - 1].iov_len);
 
-	rte_free(task);
-}
+	/* Transport data len */
+	task->data_len = payload - task->iovs[0].iov_len -
+		task->iovs[task->iovs_cnt - 1].iov_len;
 
-static struct vring_desc *
-descriptor_get_next_split(struct vring_desc *vq_desc,
-				   struct vring_desc *cur_desc)
-{
-	return &vq_desc[cur_desc->next];
-}
+	if (vhost_bdev_process_blk_commands(task->ctrlr->bdev, task))
+		/* invalid response */
+		*task->status = VIRTIO_BLK_S_IOERR;
+	else
+		/* successfully */
+		*task->status = VIRTIO_BLK_S_OK;
 
-static bool
-descriptor_has_next_split(struct vring_desc *cur_desc)
-{
-	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
+	if (task->vq->packed_ring)
+		enqueue_task_packed(task);
+	else
+		enqueue_task(task);
 }
 
-static bool
-descriptor_is_wr_split(struct vring_desc *cur_desc)
+static void
+blk_task_init(struct vhost_blk_task *task)
 {
-	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+	task->iovs_cnt = 0;
+	task->data_len = 0;
+	task->req = NULL;
+	task->status = NULL;
 }
 
 static void
-vhost_process_payload_chain_split(struct vhost_blk_task *task)
+submit_inflight_vq(struct vhost_blk_queue *vq)
 {
-	void *data;
-	uint64_t chunck_len;
+	struct rte_vhost_ring_inflight *inflight_ring;
+	struct rte_vhost_resubmit_info *resubmit_inflight;
+	struct vhost_blk_task *task;
 
-	task->iovs_cnt = 0;
+	inflight_ring = &vq->inflight_ring;
+	resubmit_inflight = inflight_ring->resubmit_inflight;
 
-	do {
-		chunck_len = task->desc_split->len;
-		data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						     task->desc_split->addr,
-						     &chunck_len);
-		if (!data || chunck_len != task->desc_split->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			return;
-		}
+	if (resubmit_inflight == NULL ||
+	    resubmit_inflight->resubmit_num == 0)
+		return;
 
-		task->iovs[task->iovs_cnt].iov_base = data;
-		task->iovs[task->iovs_cnt].iov_len = task->desc_split->len;
-		task->data_len += task->desc_split->len;
-		task->iovs_cnt++;
-		task->desc_split =
-		descriptor_get_next_split(task->vq->desc, task->desc_split);
-	} while (descriptor_has_next_split(task->desc_split));
-
-	chunck_len = task->desc_split->len;
-	task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						     task->desc_split->addr,
-						     &chunck_len);
-	if (!task->status || chunck_len != task->desc_split->len)
-		fprintf(stderr, "failed to translate desc address.\n");
-}
+	fprintf(stdout, "Resubmit inflight num is %d\n",
+		resubmit_inflight->resubmit_num);
 
-static void
-submit_completion_split(struct vhost_blk_task *task, uint32_t vid,
-	uint32_t q_idx)
-{
-	struct rte_vhost_vring *vq;
-	struct vring_used *used;
+	while (resubmit_inflight->resubmit_num-- > 0) {
+		uint16_t desc_idx;
 
-	vq = task->vq;
-	used = vq->used;
+		desc_idx = resubmit_inflight->resubmit_list[
+					resubmit_inflight->resubmit_num].index;
 
-	rte_vhost_set_last_inflight_io_split(vid, q_idx, task->req_idx);
+		if (vq->packed_ring) {
+			uint16_t task_idx;
+			struct rte_vhost_inflight_desc_packed *desc;
 
-	/* Fill out the next entry in the "used" ring.  id = the
-	 * index of the descriptor that contained the blk request.
-	 * len = the total amount of data transferred for the blk
-	 * request. We must report the correct len, for variable
-	 * length blk CDBs, where we may return less data than
-	 * allocated by the guest VM.
-	 */
-	used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
-	used->ring[used->idx & (vq->size - 1)].len = task->data_len;
-	rte_smp_mb();
-	used->idx++;
-	rte_smp_mb();
+			desc = inflight_ring->inflight_packed->desc;
+			task_idx = desc[desc[desc_idx].last].id;
+			task = &vq->tasks[task_idx];
 
-	rte_vhost_clr_inflight_desc_split(vid, q_idx, used->idx, task->req_idx);
+			task->req_idx = desc_idx;
+			task->chain_num = desc[desc_idx].num;
+			task->buffer_id = task_idx;
+			task->inflight_idx = desc_idx;
 
-	/* Send an interrupt back to the guest VM so that it knows
-	 * a completion is ready to be processed.
-	 */
-	rte_vhost_vring_call(task->bdev->vid, q_idx);
+			vq->last_avail_idx += desc[desc_idx].num;
+			if (vq->last_avail_idx >= vq->vring.size) {
+				vq->last_avail_idx -= vq->vring.size;
+				vq->avail_wrap_counter =
+					!vq->avail_wrap_counter;
+			}
+		} else
+			/* In split ring, the desc_idx is the req_id
+			 * which was initialized when allocated the task pool.
+			 */
+			task = &vq->tasks[desc_idx];
+
+		blk_task_init(task);
+		process_blk_task(task);
+	}
+
+	free(resubmit_inflight->resubmit_list);
+	resubmit_inflight->resubmit_list = NULL;
 }
 
-static void
-submit_inflight_vq_split(struct vhost_blk_ctrlr *ctrlr,
-	uint32_t q_idx)
+/* Use the buffer_id as the task_idx */
+static uint16_t
+vhost_blk_vq_get_desc_chain_buffer_id(struct vhost_blk_queue *vq,
+				      uint16_t *req_head, uint16_t *num)
 {
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_ring_inflight *inflight_vq;
-	struct rte_vhost_resubmit_info *resubmit_inflight;
-	struct rte_vhost_resubmit_desc *resubmit_list;
-	struct vhost_blk_task *task;
-	int req_idx;
-	uint64_t chunck_len;
-	int ret;
+	struct vring_packed_desc *desc = &vq->vring.desc_packed[
+						vq->last_avail_idx];
 
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	inflight_vq = &blk_vq->inflight_vq;
-	resubmit_inflight = inflight_vq->resubmit_inflight;
-	resubmit_list = resubmit_inflight->resubmit_list;
+	*req_head = vq->last_avail_idx;
+	*num = 1;
 
-	task = rte_zmalloc(NULL, sizeof(*task), 0);
-	assert(task != NULL);
+	while (descriptor_has_next_packed(desc)) {
+		vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
+		desc = &vq->vring.desc_packed[vq->last_avail_idx];
+		*num += 1;
+	}
 
-	task->ctrlr = ctrlr;
-	task->bdev = ctrlr->bdev;
-	task->vq = &blk_vq->vq;
+	/* Point to next desc */
+	vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
+	if (vq->last_avail_idx < *req_head)
+		vq->avail_wrap_counter = !vq->avail_wrap_counter;
 
-	while (resubmit_inflight->resubmit_num-- > 0) {
-		req_idx = resubmit_list[resubmit_inflight->resubmit_num].index;
-		task->req_idx = req_idx;
-		task->desc_split = &task->vq->desc[task->req_idx];
-		task->iovs_cnt = 0;
-		task->data_len = 0;
-		task->req = NULL;
-		task->status = NULL;
+	return desc->id;
+}
 
-		/* does not support indirect descriptors */
-		assert(task->desc_split != NULL);
-		assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
+static uint16_t
+vq_get_desc_idx(struct vhost_blk_queue *vq)
+{
+	uint16_t desc_idx;
+	uint16_t last_avail_idx;
 
-		chunck_len = task->desc_split->len;
-		task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-				task->desc_split->addr, &chunck_len);
-		if (!task->req || chunck_len != task->desc_split->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
-		}
+	last_avail_idx = vq->last_avail_idx & (vq->vring.size - 1);
+	desc_idx = vq->vring.avail->ring[last_avail_idx];
+	vq->last_avail_idx++;
 
-		task->desc_split = descriptor_get_next_split(task->vq->desc,
-							     task->desc_split);
-		if (!descriptor_has_next_split(task->desc_split)) {
-			task->dxfer_dir = BLK_DIR_NONE;
-			chunck_len = task->desc_split->len;
-			task->status = (void *)(uintptr_t)
-				       gpa_to_vva(task->bdev->vid,
-						  task->desc_split->addr,
-						  &chunck_len);
-			if (!task->status ||
-				chunck_len != task->desc_split->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
-				return;
-			}
-		} else {
-			task->readtype =
-				descriptor_is_wr_split(task->desc_split);
-			vhost_process_payload_chain_split(task);
-		}
+	return desc_idx;
+}
 
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
-		if (ret) {
-			/* invalid response */
-			*task->status = VIRTIO_BLK_S_IOERR;
-		} else {
-			/* successfully */
-			*task->status = VIRTIO_BLK_S_OK;
-		}
-		submit_completion_split(task, ctrlr->bdev->vid, q_idx);
-	}
+static int
+vhost_blk_vq_is_avail(struct vhost_blk_queue *vq)
+{
+	if (vq->packed_ring) {
+		uint16_t flags = vq->vring.desc_packed[
+					vq->last_avail_idx].flags;
+		bool avail_wrap_counter = vq->avail_wrap_counter;
 
-	rte_free(task);
+		return (!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter &&
+			!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter);
+	} else {
+		if (vq->vring.avail->idx != vq->last_avail_idx)
+			return 1;
+
+		return 0;
+	}
 }
 
 static void
-process_requestq_split(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
+process_vq(struct vhost_blk_queue *vq)
 {
-	int ret;
-	int req_idx;
-	uint16_t last_idx;
-	uint64_t chunck_len;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_vring *vq;
 	struct vhost_blk_task *task;
 
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	vq = &blk_vq->vq;
+	if (vq->packed_ring) {
+		while (vhost_blk_vq_is_avail(vq)) {
+			uint16_t task_idx, req_idx, last_idx, chain_num;
 
-	task = rte_zmalloc(NULL, sizeof(*task), 0);
-	assert(task != NULL);
-	task->ctrlr = ctrlr;
-	task->bdev = ctrlr->bdev;
-	task->vq = vq;
+			task_idx = vhost_blk_vq_get_desc_chain_buffer_id(vq,
+					&req_idx, &chain_num);
+			task = &vq->tasks[task_idx];
 
-	while (vq->avail->idx != blk_vq->last_avail_idx) {
-		last_idx = blk_vq->last_avail_idx & (vq->size - 1);
-		req_idx = vq->avail->ring[last_idx];
-		task->req_idx = req_idx;
-		task->desc_split = &task->vq->desc[task->req_idx];
-		task->iovs_cnt = 0;
-		task->data_len = 0;
-		task->req = NULL;
-		task->status = NULL;
+			blk_task_init(task);
+			task->req_idx = req_idx;
+			task->chain_num = chain_num;
+			task->buffer_id = task_idx;
+			last_idx = (req_idx + chain_num - 1) % vq->vring.size;
 
-		rte_vhost_set_inflight_desc_split(ctrlr->bdev->vid, q_idx,
-							task->req_idx);
-
-		/* does not support indirect descriptors */
-		assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
+			rte_vhost_set_inflight_desc_packed(task->ctrlr->vid,
+							   vq->id,
+							   task->req_idx,
+							   last_idx,
+							   &task->inflight_idx);
 
-		chunck_len = task->desc_split->len;
-		task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-				task->desc_split->addr, &chunck_len);
-		if (!task->req || chunck_len != task->desc_split->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
-		}
-
-		task->desc_split = descriptor_get_next_split(task->vq->desc,
-							     task->desc_split);
-		if (!descriptor_has_next_split(task->desc_split)) {
-			task->dxfer_dir = BLK_DIR_NONE;
-			chunck_len = task->desc_split->len;
-			task->status = (void *)(uintptr_t)
-					      gpa_to_vva(task->bdev->vid,
-							 task->desc_split->addr,
-							 &chunck_len);
-			if (!task->status ||
-				chunck_len != task->desc_split->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
-				return;
-			}
-		} else {
-			task->readtype =
-				descriptor_is_wr_split(task->desc_split);
-			vhost_process_payload_chain_split(task);
+			process_blk_task(task);
 		}
-		blk_vq->last_avail_idx++;
-
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
-		if (ret) {
-			/* invalid response */
-			*task->status = VIRTIO_BLK_S_IOERR;
-		} else {
-			/* successfully */
-			*task->status = VIRTIO_BLK_S_OK;
+	} else {
+		while (vhost_blk_vq_is_avail(vq)) {
+			uint16_t desc_idx;
+
+			desc_idx = vq_get_desc_idx(vq);
+			task = &vq->tasks[desc_idx];
+
+			blk_task_init(task);
+			rte_vhost_set_inflight_desc_split(task->ctrlr->vid,
+							  vq->id,
+							  task->req_idx);
+			process_blk_task(task);
 		}
-
-		submit_completion_split(task, ctrlr->bdev->vid, q_idx);
 	}
-
-	rte_free(task);
 }
 
 static void *
 ctrlr_worker(void *arg)
 {
 	struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_ring_inflight *inflight_vq;
 	cpu_set_t cpuset;
 	pthread_t thread;
 	int i;
@@ -774,106 +543,128 @@ ctrlr_worker(void *arg)
 	CPU_SET(0, &cpuset);
 	pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
 
+	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
+		submit_inflight_vq(&ctrlr->queues[i]);
+
+	while (worker_thread_status != WORKER_STATE_STOP)
+		for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
+			process_vq(&ctrlr->queues[i]);
+
+	fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
+	sem_post(&exit_sem);
+	return NULL;
+}
+
+static int
+alloc_task_pool(struct vhost_blk_ctrlr *ctrlr)
+{
+	struct vhost_blk_queue *vq;
+	int i, j;
+
 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-		blk_vq = &ctrlr->bdev->queues[i];
-		inflight_vq = &blk_vq->inflight_vq;
-		if (inflight_vq->resubmit_inflight != NULL &&
-		    inflight_vq->resubmit_inflight->resubmit_num != 0) {
-			if (ctrlr->packed_ring)
-				submit_inflight_vq_packed(ctrlr, i);
-			else
-				submit_inflight_vq_split(ctrlr, i);
+		vq = &ctrlr->queues[i];
+
+		vq->tasks = rte_zmalloc(NULL,
+			sizeof(struct vhost_blk_task) * vq->vring.size, 0);
+		if (!vq->tasks) {
+			fprintf(stderr, "Failed to allocate task memory\n");
+			return -1;
 		}
-	}
 
-	while (!g_should_stop && ctrlr->bdev != NULL) {
-		for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-			if (ctrlr->packed_ring)
-				process_requestq_packed(ctrlr, i);
-			else
-				process_requestq_split(ctrlr, i);
+		for (j = 0; j < vq->vring.size; j++) {
+			vq->tasks[j].req_idx = j;
+			vq->tasks[j].ctrlr = ctrlr;
+			vq->tasks[j].vq = vq;
 		}
 	}
 
-	g_should_stop = 2;
-	fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
-	sem_post(&exit_sem);
-	return NULL;
+	return 0;
+}
+
+static void
+free_task_pool(struct vhost_blk_ctrlr *ctrlr)
+{
+	int i;
+
+	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
+		rte_free(ctrlr->queues[i].tasks);
 }
 
 static int
 new_device(int vid)
 {
 	struct vhost_blk_ctrlr *ctrlr;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_vring *vq;
+	struct vhost_blk_queue *vq;
+	char path[PATH_MAX];
 	uint64_t features;
 	pthread_t tid;
 	int i, ret;
+	bool packed_ring;
 
-	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
+	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
+	if (ret) {
+		fprintf(stderr, "Failed to get the socket path\n");
+		return -1;
+	}
+
+	ctrlr = vhost_blk_ctrlr_find(path);
 	if (!ctrlr) {
-		fprintf(stderr, "Controller is not ready\n");
+		fprintf(stderr, "Failed to find controller\n");
 		return -1;
 	}
 
 	if (ctrlr->started)
 		return 0;
 
-	ctrlr->bdev->vid = vid;
+	ctrlr->vid = vid;
 	ret = rte_vhost_get_negotiated_features(vid, &features);
 	if (ret) {
-		fprintf(stderr, "failed to get the negotiated features\n");
+		fprintf(stderr, "Failed to get the negotiated features\n");
 		return -1;
 	}
-	ctrlr->packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
-
-	ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
-	if (ret)
-		fprintf(stderr, "Get Controller memory region failed\n");
-	assert(ctrlr->mem != NULL);
+	packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
 
 	/* Disable Notifications and init last idx */
 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-		blk_vq = &ctrlr->bdev->queues[i];
-		vq = &blk_vq->vq;
-
-		ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
-		assert(ret == 0);
-
-		ret = rte_vhost_get_vring_base(ctrlr->bdev->vid, i,
-					       &blk_vq->last_avail_idx,
-					       &blk_vq->last_used_idx);
-		assert(ret == 0);
-
-		ret = rte_vhost_get_vhost_ring_inflight(ctrlr->bdev->vid, i,
-							&blk_vq->inflight_vq);
-		assert(ret == 0);
-
-		if (ctrlr->packed_ring) {
+		vq = &ctrlr->queues[i];
+		vq->id = i;
+
+		assert(rte_vhost_get_vhost_vring(ctrlr->vid, i,
+						 &vq->vring) == 0);
+		assert(rte_vhost_get_vring_base(ctrlr->vid, i,
+					       &vq->last_avail_idx,
+					       &vq->last_used_idx) == 0);
+		assert(rte_vhost_get_vhost_ring_inflight(ctrlr->vid, i,
+						&vq->inflight_ring) == 0);
+
+		if (packed_ring) {
 			/* for the reconnection */
-			ret = rte_vhost_get_vring_base_from_inflight(
-				ctrlr->bdev->vid, i,
-				&blk_vq->last_avail_idx,
-				&blk_vq->last_used_idx);
-			assert(ret == 0);
+			assert(rte_vhost_get_vring_base_from_inflight(
+				ctrlr->vid, i,
+				&vq->last_avail_idx,
+				&vq->last_used_idx) == 0);
 
-			blk_vq->avail_wrap_counter = blk_vq->last_avail_idx &
+			vq->avail_wrap_counter = vq->last_avail_idx &
 				(1 << 15);
-			blk_vq->last_avail_idx = blk_vq->last_avail_idx &
+			vq->last_avail_idx = vq->last_avail_idx &
 				0x7fff;
-			blk_vq->used_wrap_counter = blk_vq->last_used_idx &
+			vq->used_wrap_counter = vq->last_used_idx &
 				(1 << 15);
-			blk_vq->last_used_idx = blk_vq->last_used_idx &
+			vq->last_used_idx = vq->last_used_idx &
 				0x7fff;
 		}
 
+		vq->packed_ring = packed_ring;
 		rte_vhost_enable_guest_notification(vid, i, 0);
 	}
 
+	assert(rte_vhost_get_mem_table(vid, &ctrlr->mem) == 0);
+	assert(ctrlr->mem != NULL);
+	assert(alloc_task_pool(ctrlr) == 0);
+
 	/* start polling vring */
-	g_should_stop = 0;
-	fprintf(stdout, "New Device %s, Device ID %d\n", dev_pathname, vid);
+	worker_thread_status = WORKER_STATE_START;
+	fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
 	if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
 		fprintf(stderr, "Worker Thread Started Failed\n");
 		return -1;
@@ -890,7 +681,7 @@ destroy_device(int vid)
 {
 	char path[PATH_MAX];
 	struct vhost_blk_ctrlr *ctrlr;
-	struct vhost_blk_queue *blk_vq;
+	struct vhost_blk_queue *vq;
 	int i, ret;
 
 	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
@@ -909,27 +700,27 @@ destroy_device(int vid)
 	if (!ctrlr->started)
 		return;
 
-	g_should_stop = 1;
-	while (g_should_stop != 2)
-		;
+	worker_thread_status = WORKER_STATE_STOP;
+	sem_wait(&exit_sem);
 
 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-		blk_vq = &ctrlr->bdev->queues[i];
-		if (ctrlr->packed_ring) {
-			blk_vq->last_avail_idx |= (blk_vq->avail_wrap_counter <<
+		vq = &ctrlr->queues[i];
+		if (vq->packed_ring) {
+			vq->last_avail_idx |= (vq->avail_wrap_counter <<
 				15);
-			blk_vq->last_used_idx |= (blk_vq->used_wrap_counter <<
+			vq->last_used_idx |= (vq->used_wrap_counter <<
 				15);
 		}
-		rte_vhost_set_vring_base(ctrlr->bdev->vid, i,
-					 blk_vq->last_avail_idx,
-					 blk_vq->last_used_idx);
+
+		rte_vhost_set_vring_base(ctrlr->vid, i,
+					 vq->last_avail_idx,
+					 vq->last_used_idx);
 	}
 
+	free_task_pool(ctrlr);
 	free(ctrlr->mem);
 
 	ctrlr->started = 0;
-	sem_wait(&exit_sem);
 }
 
 static int
@@ -964,13 +755,13 @@ vhost_blk_bdev_construct(const char *bdev_name,
 	bdev->blockcnt = blk_cnt;
 	bdev->write_cache = wce_enable;
 
-	fprintf(stdout, "blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
+	fprintf(stdout, "Blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
 		bdev->blockcnt);
 
 	/* use memory as disk storage space */
 	bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
 	if (!bdev->data) {
-		fprintf(stderr, "no enough reserved huge memory for disk\n");
+		fprintf(stderr, "No enough reserved huge memory for disk\n");
 		free(bdev);
 		return NULL;
 	}
@@ -997,7 +788,7 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name)
 	unlink(dev_pathname);
 
 	if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
-		fprintf(stderr, "socket %s already exists\n", dev_pathname);
+		fprintf(stderr, "Socket %s already exists\n", dev_pathname);
 		return NULL;
 	}
 
@@ -1008,7 +799,7 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name)
 		return NULL;
 	}
 
-	/* set proper features */
+	/* set vhost user protocol features */
 	vhost_dev_install_rte_compat_hooks(dev_pathname);
 
 	ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
@@ -1033,28 +824,32 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name)
 }
 
 static void
-signal_handler(__rte_unused int signum)
+vhost_blk_ctrlr_destroy(struct vhost_blk_ctrlr *ctrlr)
 {
-	struct vhost_blk_ctrlr *ctrlr;
-
-	unlink(dev_pathname);
+	if (ctrlr->bdev != NULL) {
+		if (ctrlr->bdev->data != NULL)
+			rte_free(ctrlr->bdev->data);
 
-	if (g_should_stop != -1) {
-		g_should_stop = 1;
-		while (g_should_stop != 2)
-			;
+		rte_free(ctrlr->bdev);
 	}
+	rte_free(ctrlr);
+
+	rte_vhost_driver_unregister(dev_pathname);
+}
+
+static void
+signal_handler(__rte_unused int signum)
+{
+	struct vhost_blk_ctrlr *ctrlr;
 
 	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
-	if (ctrlr != NULL) {
-		if (ctrlr->bdev != NULL) {
-			rte_free(ctrlr->bdev->data);
-			rte_free(ctrlr->bdev);
-		}
-		rte_free(ctrlr);
-	}
+	if (ctrlr == NULL)
+		return;
 
-	rte_vhost_driver_unregister(dev_pathname);
+	if (ctrlr->started)
+		destroy_device(ctrlr->vid);
+
+	vhost_blk_ctrlr_destroy(ctrlr);
 	exit(0);
 }
 
@@ -1062,14 +857,12 @@ int main(int argc, char *argv[])
 {
 	int ret;
 
-	signal(SIGINT, signal_handler);
-
 	/* init EAL */
 	ret = rte_eal_init(argc, argv);
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
 
-	g_vhost_ctrlr = vhost_blk_ctrlr_construct("vhost.socket");
+	g_vhost_ctrlr = vhost_blk_ctrlr_construct(CTRLR_NAME);
 	if (g_vhost_ctrlr == NULL) {
 		fprintf(stderr, "Construct vhost blk controller failed\n");
 		return 0;
@@ -1080,6 +873,8 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
+	signal(SIGINT, signal_handler);
+
 	rte_vhost_driver_start(dev_pathname);
 
 	/* loop for exit the application */
diff --git a/examples/vhost_blk/vhost_blk.h b/examples/vhost_blk/vhost_blk.h
index 933e2b7c5..4a8040588 100644
--- a/examples/vhost_blk/vhost_blk.h
+++ b/examples/vhost_blk/vhost_blk.h
@@ -30,12 +30,18 @@ struct vring_packed_desc {
 #endif
 
 struct vhost_blk_queue {
-	struct rte_vhost_vring vq;
-	struct rte_vhost_ring_inflight inflight_vq;
+	struct rte_vhost_vring vring;
+	struct rte_vhost_ring_inflight inflight_ring;
+
 	uint16_t last_avail_idx;
 	uint16_t last_used_idx;
+	uint16_t id;
+
 	bool avail_wrap_counter;
 	bool used_wrap_counter;
+	bool packed_ring;
+
+	struct vhost_blk_task *tasks;
 };
 
 #define NUM_OF_BLK_QUEUES 1
@@ -43,10 +49,6 @@ struct vhost_blk_queue {
 #define min(a, b) (((a) < (b)) ? (a) : (b))
 
 struct vhost_block_dev {
-	/** ID for vhost library. */
-	int vid;
-	/** Queues for the block device */
-	struct vhost_blk_queue queues[NUM_OF_BLK_QUEUES];
 	/** Unique name for this block device. */
 	char name[64];
 
@@ -68,8 +70,10 @@ struct vhost_block_dev {
 
 struct vhost_blk_ctrlr {
 	uint8_t started;
-	uint8_t packed_ring;
-	uint8_t need_restart;
+	/** ID for vhost library. */
+	int vid;
+	/** Queues for the block device */
+	struct vhost_blk_queue queues[NUM_OF_BLK_QUEUES];
 	/** Only support 1 LUN for the example */
 	struct vhost_block_dev *bdev;
 	/** VM memory region */
@@ -85,31 +89,20 @@ enum blk_data_dir {
 };
 
 struct vhost_blk_task {
-	uint8_t readtype;
 	uint8_t req_idx;
-	uint16_t head_idx;
-	uint16_t last_idx;
+	uint16_t chain_num;
 	uint16_t inflight_idx;
 	uint16_t buffer_id;
 	uint32_t dxfer_dir;
 	uint32_t data_len;
-	struct virtio_blk_outhdr *req;
 
+	struct virtio_blk_outhdr *req;
 	volatile uint8_t *status;
-
 	struct iovec iovs[VHOST_BLK_MAX_IOVS];
 	uint32_t iovs_cnt;
-	struct vring_packed_desc *desc_packed;
-	struct vring_desc *desc_split;
-	struct rte_vhost_vring *vq;
-	struct vhost_block_dev *bdev;
-	struct vhost_blk_ctrlr *ctrlr;
-};
 
-struct inflight_blk_task {
-	struct vhost_blk_task blk_task;
-	struct rte_vhost_inflight_desc_packed *inflight_desc;
-	struct rte_vhost_inflight_info_packed *inflight_packed;
+	struct vhost_blk_queue *vq;
+	struct vhost_blk_ctrlr *ctrlr;
 };
 
 struct vhost_blk_ctrlr *g_vhost_ctrlr;
-- 
2.17.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example
  2020-02-28 15:32 [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example Jin Yu
@ 2020-04-27  8:01 ` Maxime Coquelin
  2020-04-28 16:05 ` Maxime Coquelin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2020-04-27  8:01 UTC (permalink / raw)
  To: Jin Yu, Tiwei Bie, Zhihong Wang, John McNamara, Marko Kovacevic; +Cc: dev



On 2/28/20 4:32 PM, Jin Yu wrote:
> Decrease the code and make it easier to read. It's
> useful for understanding the inflight APIs and how
> packed ring works. Update the RST because the packed
> ring patch has been merged to QEMU master and ring_packed
> parameter changes to packed.
> 
> Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample")
> 
> Signed-off-by: Jin Yu <jin.yu@intel.com>
> ---
>  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
>  examples/vhost_blk/blk.c               |   13 +-
>  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
>  examples/vhost_blk/vhost_blk.h         |   39 +-
>  4 files changed, 494 insertions(+), 705 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example
  2020-02-28 15:32 [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example Jin Yu
  2020-04-27  8:01 ` Maxime Coquelin
@ 2020-04-28 16:05 ` Maxime Coquelin
  2020-04-29 17:53 ` Ferruh Yigit
  2020-04-30 17:20 ` [dpdk-dev] [PATCH v2] " Jin Yu
  3 siblings, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2020-04-28 16:05 UTC (permalink / raw)
  To: Jin Yu, Tiwei Bie, Zhihong Wang, John McNamara, Marko Kovacevic; +Cc: dev



On 2/28/20 4:32 PM, Jin Yu wrote:
> Decrease the code and make it easier to read. It's
> useful for understanding the inflight APIs and how
> packed ring works. Update the RST because the packed
> ring patch has been merged to QEMU master and ring_packed
> parameter changes to packed.
> 
> Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample")
> 
> Signed-off-by: Jin Yu <jin.yu@intel.com>
> ---
>  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
>  examples/vhost_blk/blk.c               |   13 +-
>  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
>  examples/vhost_blk/vhost_blk.h         |   39 +-
>  4 files changed, 494 insertions(+), 705 deletions(-)

Applied to dpdk-next-virtio/master

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example
  2020-02-28 15:32 [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example Jin Yu
  2020-04-27  8:01 ` Maxime Coquelin
  2020-04-28 16:05 ` Maxime Coquelin
@ 2020-04-29 17:53 ` Ferruh Yigit
  2020-04-30  1:42   ` Yu, Jin
  2020-04-30 17:20 ` [dpdk-dev] [PATCH v2] " Jin Yu
  3 siblings, 1 reply; 10+ messages in thread
From: Ferruh Yigit @ 2020-04-29 17:53 UTC (permalink / raw)
  To: Jin Yu, Maxime Coquelin, Tiwei Bie, Zhihong Wang, John McNamara,
	Marko Kovacevic
  Cc: dev

On 2/28/2020 3:32 PM, Jin Yu wrote:
> Decrease the code and make it easier to read. It's
> useful for understanding the inflight APIs and how
> packed ring works. Update the RST because the packed
> ring patch has been merged to QEMU master and ring_packed
> parameter changes to packed.
> 
> Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample")
> 
> Signed-off-by: Jin Yu <jin.yu@intel.com>
> ---
>  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
>  examples/vhost_blk/blk.c               |   13 +-
>  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
>  examples/vhost_blk/vhost_blk.h         |   39 +-
>  4 files changed, 494 insertions(+), 705 deletions(-)

Getting following build error with 32-bit build, can you please check it:

.../examples/vhost_blk/vhost_blk.c: In function ‘desc_payload_to_iovs’:
.../examples/vhost_blk/vhost_blk.c:157:9: error: cast to pointer from integer of
different size [-Werror=int-to-pointer-cast]
  157 |   vva = (void *)gpa_to_vva(ctrlr,
      |         ^



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example
  2020-04-29 17:53 ` Ferruh Yigit
@ 2020-04-30  1:42   ` Yu, Jin
  2020-04-30  9:08     ` Maxime Coquelin
  0 siblings, 1 reply; 10+ messages in thread
From: Yu, Jin @ 2020-04-30  1:42 UTC (permalink / raw)
  To: Yigit, Ferruh, Maxime Coquelin, Tiwei Bie, Wang, Zhihong,
	Mcnamara, John, Kovacevic, Marko
  Cc: dev

Got it. I will check and fix it. 

Thanks.

> -----Original Message-----
> From: Yigit, Ferruh <ferruh.yigit@intel.com>
> Sent: Thursday, April 30, 2020 1:54 AM
> To: Yu, Jin <jin.yu@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; Tiwei Bie <tiwei.bie@intel.com>; Wang,
> Zhihong <zhihong.wang@intel.com>; Mcnamara, John
> <john.mcnamara@intel.com>; Kovacevic, Marko <marko.kovacevic@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk
> example
> 
> On 2/28/2020 3:32 PM, Jin Yu wrote:
> > Decrease the code and make it easier to read. It's useful for
> > understanding the inflight APIs and how packed ring works. Update the
> > RST because the packed ring patch has been merged to QEMU master and
> > ring_packed parameter changes to packed.
> >
> > Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage
> > sample")
> >
> > Signed-off-by: Jin Yu <jin.yu@intel.com>
> > ---
> >  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
> >  examples/vhost_blk/blk.c               |   13 +-
> >  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
> >  examples/vhost_blk/vhost_blk.h         |   39 +-
> >  4 files changed, 494 insertions(+), 705 deletions(-)
> 
> Getting following build error with 32-bit build, can you please check it:
> 
> .../examples/vhost_blk/vhost_blk.c: In function ‘desc_payload_to_iovs’:
> .../examples/vhost_blk/vhost_blk.c:157:9: error: cast to pointer from integer of
> different size [-Werror=int-to-pointer-cast]
>   157 |   vva = (void *)gpa_to_vva(ctrlr,
>       |         ^
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example
  2020-04-30  1:42   ` Yu, Jin
@ 2020-04-30  9:08     ` Maxime Coquelin
  2020-04-30  9:42       ` Yu, Jin
  0 siblings, 1 reply; 10+ messages in thread
From: Maxime Coquelin @ 2020-04-30  9:08 UTC (permalink / raw)
  To: Yu, Jin, Yigit, Ferruh, Tiwei Bie, Wang, Zhihong, Mcnamara, John,
	Kovacevic, Marko
  Cc: dev



On 4/30/20 3:42 AM, Yu, Jin wrote:
> Got it. I will check and fix it. 

Thanks, I will need the fix today, just send a v2.
If not possible to do it today, please let me know.

Maxime

> Thanks.
> 
>> -----Original Message-----
>> From: Yigit, Ferruh <ferruh.yigit@intel.com>
>> Sent: Thursday, April 30, 2020 1:54 AM
>> To: Yu, Jin <jin.yu@intel.com>; Maxime Coquelin
>> <maxime.coquelin@redhat.com>; Tiwei Bie <tiwei.bie@intel.com>; Wang,
>> Zhihong <zhihong.wang@intel.com>; Mcnamara, John
>> <john.mcnamara@intel.com>; Kovacevic, Marko <marko.kovacevic@intel.com>
>> Cc: dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk
>> example
>>
>> On 2/28/2020 3:32 PM, Jin Yu wrote:
>>> Decrease the code and make it easier to read. It's useful for
>>> understanding the inflight APIs and how packed ring works. Update the
>>> RST because the packed ring patch has been merged to QEMU master and
>>> ring_packed parameter changes to packed.
>>>
>>> Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage
>>> sample")
>>>
>>> Signed-off-by: Jin Yu <jin.yu@intel.com>
>>> ---
>>>  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
>>>  examples/vhost_blk/blk.c               |   13 +-
>>>  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
>>>  examples/vhost_blk/vhost_blk.h         |   39 +-
>>>  4 files changed, 494 insertions(+), 705 deletions(-)
>>
>> Getting following build error with 32-bit build, can you please check it:
>>
>> .../examples/vhost_blk/vhost_blk.c: In function ‘desc_payload_to_iovs’:
>> .../examples/vhost_blk/vhost_blk.c:157:9: error: cast to pointer from integer of
>> different size [-Werror=int-to-pointer-cast]
>>   157 |   vva = (void *)gpa_to_vva(ctrlr,
>>       |         ^
>>
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example
  2020-04-30  9:08     ` Maxime Coquelin
@ 2020-04-30  9:42       ` Yu, Jin
  0 siblings, 0 replies; 10+ messages in thread
From: Yu, Jin @ 2020-04-30  9:42 UTC (permalink / raw)
  To: Maxime Coquelin, Yigit, Ferruh, Tiwei Bie, Wang, Zhihong,
	Mcnamara, John, Kovacevic, Marko
  Cc: dev

Thanks Maxime.
I  just send the V2. Sorry for late.

Jin

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, April 30, 2020 5:08 PM
> To: Yu, Jin <jin.yu@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>; Tiwei
> Bie <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Mcnamara, John <john.mcnamara@intel.com>; Kovacevic, Marko
> <marko.kovacevic@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk
> example
> 
> 
> 
> On 4/30/20 3:42 AM, Yu, Jin wrote:
> > Got it. I will check and fix it.
> 
> Thanks, I will need the fix today, just send a v2.
> If not possible to do it today, please let me know.
> 
> Maxime
> 
> > Thanks.
> >
> >> -----Original Message-----
> >> From: Yigit, Ferruh <ferruh.yigit@intel.com>
> >> Sent: Thursday, April 30, 2020 1:54 AM
> >> To: Yu, Jin <jin.yu@intel.com>; Maxime Coquelin
> >> <maxime.coquelin@redhat.com>; Tiwei Bie <tiwei.bie@intel.com>; Wang,
> >> Zhihong <zhihong.wang@intel.com>; Mcnamara, John
> >> <john.mcnamara@intel.com>; Kovacevic, Marko
> >> <marko.kovacevic@intel.com>
> >> Cc: dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH] examples/vhost_blk: refactor
> >> vhost-blk example
> >>
> >> On 2/28/2020 3:32 PM, Jin Yu wrote:
> >>> Decrease the code and make it easier to read. It's useful for
> >>> understanding the inflight APIs and how packed ring works. Update
> >>> the RST because the packed ring patch has been merged to QEMU
> master
> >>> and ring_packed parameter changes to packed.
> >>>
> >>> Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage
> >>> sample")
> >>>
> >>> Signed-off-by: Jin Yu <jin.yu@intel.com>
> >>> ---
> >>>  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
> >>>  examples/vhost_blk/blk.c               |   13 +-
> >>>  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
> >>>  examples/vhost_blk/vhost_blk.h         |   39 +-
> >>>  4 files changed, 494 insertions(+), 705 deletions(-)
> >>
> >> Getting following build error with 32-bit build, can you please check it:
> >>
> >> .../examples/vhost_blk/vhost_blk.c: In function ‘desc_payload_to_iovs’:
> >> .../examples/vhost_blk/vhost_blk.c:157:9: error: cast to pointer from
> >> integer of different size [-Werror=int-to-pointer-cast]
> >>   157 |   vva = (void *)gpa_to_vva(ctrlr,
> >>       |         ^
> >>
> >


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v2] examples/vhost_blk: refactor vhost-blk example
  2020-02-28 15:32 [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example Jin Yu
                   ` (2 preceding siblings ...)
  2020-04-29 17:53 ` Ferruh Yigit
@ 2020-04-30 17:20 ` Jin Yu
  2020-04-30 20:41   ` Maxime Coquelin
  2020-04-30 20:53   ` Maxime Coquelin
  3 siblings, 2 replies; 10+ messages in thread
From: Jin Yu @ 2020-04-30 17:20 UTC (permalink / raw)
  To: Maxime Coquelin, Tiwei Bie, Zhihong Wang, John McNamara, Marko Kovacevic
  Cc: dev, Jin Yu

Decrease the code and make it easier to read. It's
useful for understanding the inflight APIs and how
packed ring works. Update the RST because the packed
ring patch has been merged to QEMU master and ring_packed
parameter changes to packed.

Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample")

Signed-off-by: Jin Yu <jin.yu@intel.com>
---
V2 - fix build error in 32-bit
---
 doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
 examples/vhost_blk/blk.c               |   13 +-
 examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
 examples/vhost_blk/vhost_blk.h         |   39 +-
 4 files changed, 494 insertions(+), 705 deletions(-)

diff --git a/doc/guides/sample_app_ug/vhost_blk.rst b/doc/guides/sample_app_ug/vhost_blk.rst
index 39096e2e4..681de6f3e 100644
--- a/doc/guides/sample_app_ug/vhost_blk.rst
+++ b/doc/guides/sample_app_ug/vhost_blk.rst
@@ -51,7 +51,7 @@ Start the VM
         -drive file=os.img,if=none,id=disk \
         -device ide-hd,drive=disk,bootindex=0 \
         -chardev socket,id=char0,reconnect=1,path=/tmp/vhost.socket \
-        -device vhost-user-blk-pci,ring_packed=1,chardev=char0,num-queues=1 \
+        -device vhost-user-blk-pci,packed=on,chardev=char0,num-queues=1 \
         ...
 
 .. note::
@@ -59,5 +59,7 @@ Start the VM
     Qemu v4.0 or newer version is required.
     reconnect=1 means live recovery support that qemu can reconnect vhost_blk
     after we restart vhost_blk example.
-    ring_packed=1 means the device support packed ring but need the guest kernel
-    version >= 5.0
+    packed=on means the device support packed ring but need the guest kernel
+    version >= 5.0.
+    Now Qemu commit 9bb73502321d46f4d320fa17aa38201445783fc4 both support the
+    vhost-blk reconnect and packed ring.
diff --git a/examples/vhost_blk/blk.c b/examples/vhost_blk/blk.c
index 1b0b764b2..9048e2f8a 100644
--- a/examples/vhost_blk/blk.c
+++ b/examples/vhost_blk/blk.c
@@ -50,7 +50,10 @@ vhost_bdev_blk_readwrite(struct vhost_block_dev *bdev,
 
 	offset = lba_512 * 512;
 
-	for (i = 0; i < task->iovs_cnt; i++) {
+	/* iovs[0] is the head and iovs[iovs_cnt - 1] is the tail
+	 * Middle is the data range
+	 */
+	for (i = 1; i < task->iovs_cnt - 1; i++) {
 		if (task->dxfer_dir == BLK_DIR_TO_DEV)
 			memcpy(bdev->data + offset, task->iovs[i].iov_base,
 			       task->iovs[i].iov_len);
@@ -83,7 +86,7 @@ vhost_bdev_process_blk_commands(struct vhost_block_dev *bdev,
 				"%s - passed IO buffer is not multiple of 512b"
 				"(req_idx = %"PRIu16").\n",
 				task->req->type ? "WRITE" : "READ",
-				task->head_idx);
+				task->req_idx);
 			return VIRTIO_BLK_S_UNSUPP;
 		}
 
@@ -98,14 +101,10 @@ vhost_bdev_process_blk_commands(struct vhost_block_dev *bdev,
 				"%s - passed IO buffer is not multiple of 512b"
 				"(req_idx = %"PRIu16").\n",
 				task->req->type ? "WRITE" : "READ",
-				task->head_idx);
+				task->req_idx);
 			return VIRTIO_BLK_S_UNSUPP;
 		}
 
-		if (task->readtype) {
-			fprintf(stderr, "type isn't right\n");
-			return VIRTIO_BLK_S_IOERR;
-		}
 		task->dxfer_dir = BLK_DIR_TO_DEV;
 		vhost_bdev_blk_readwrite(bdev, task,
 					 task->req->sector, task->data_len);
diff --git a/examples/vhost_blk/vhost_blk.c b/examples/vhost_blk/vhost_blk.c
index 74c82a900..82037ea9e 100644
--- a/examples/vhost_blk/vhost_blk.c
+++ b/examples/vhost_blk/vhost_blk.c
@@ -26,15 +26,22 @@
 
 #define MAX_TASK		12
 
-#define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \
+#define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) |\
 			    (1ULL << VIRTIO_F_VERSION_1) |\
 			    (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
 			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
 
+#define CTRLR_NAME		"vhost.socket"
+
+enum CTRLR_WORKER_STATUS {
+	WORKER_STATE_START = 0,
+	WORKER_STATE_STOP,
+};
+
 /* Path to folder where character device will be created. Can be set by user. */
 static char dev_pathname[PATH_MAX] = "";
 static sem_t exit_sem;
-static int g_should_stop = -1;
+static enum CTRLR_WORKER_STATUS worker_thread_status;
 
 struct vhost_blk_ctrlr *
 vhost_blk_ctrlr_find(const char *ctrlr_name)
@@ -46,716 +53,478 @@ vhost_blk_ctrlr_find(const char *ctrlr_name)
 	return g_vhost_ctrlr;
 }
 
-static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
+static uint64_t
+gpa_to_vva(struct vhost_blk_ctrlr *ctrlr, uint64_t gpa, uint64_t *len)
 {
-	char path[PATH_MAX];
-	struct vhost_blk_ctrlr *ctrlr;
-	int ret = 0;
-
-	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
-	if (ret) {
-		fprintf(stderr, "Cannot get socket name\n");
-		assert(ret != 0);
-	}
-
-	ctrlr = vhost_blk_ctrlr_find(path);
-	if (!ctrlr) {
-		fprintf(stderr, "Controller is not ready\n");
-		assert(ctrlr != NULL);
-	}
-
 	assert(ctrlr->mem != NULL);
 
 	return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
 }
 
-static struct vring_packed_desc *
-descriptor_get_next_packed(struct rte_vhost_vring *vq,
-			     uint16_t *idx)
+static void
+enqueue_task(struct vhost_blk_task *task)
 {
-	if (vq->desc_packed[*idx % vq->size].flags & VIRTQ_DESC_F_NEXT) {
-		*idx += 1;
-		return &vq->desc_packed[*idx % vq->size];
-	}
+	struct vhost_blk_queue *vq = task->vq;
+	struct vring_used *used = vq->vring.used;
 
-	return NULL;
-}
+	rte_vhost_set_last_inflight_io_split(task->ctrlr->vid,
+		vq->id, task->req_idx);
 
-static bool
-descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
-{
-	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
-}
+	/* Fill out the next entry in the "used" ring.  id = the
+	 * index of the descriptor that contained the blk request.
+	 * len = the total amount of data transferred for the blk
+	 * request. We must report the correct len, for variable
+	 * length blk CDBs, where we may return less data than
+	 * allocated by the guest VM.
+	 */
+	used->ring[used->idx & (vq->vring.size - 1)].id = task->req_idx;
+	used->ring[used->idx & (vq->vring.size - 1)].len = task->data_len;
+	rte_smp_mb();
+	used->idx++;
+	rte_smp_mb();
 
-static bool
-descriptor_is_wr_packed(struct vring_packed_desc *cur_desc)
-{
-	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+	rte_vhost_clr_inflight_desc_split(task->ctrlr->vid,
+		vq->id, used->idx, task->req_idx);
+
+	/* Send an interrupt back to the guest VM so that it knows
+	 * a completion is ready to be processed.
+	 */
+	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
 }
 
-static struct rte_vhost_inflight_desc_packed *
-inflight_desc_get_next(struct rte_vhost_inflight_info_packed *inflight_packed,
-			       struct rte_vhost_inflight_desc_packed *cur_desc)
+static void
+enqueue_task_packed(struct vhost_blk_task *task)
 {
-	if (!!(cur_desc->flags & VIRTQ_DESC_F_NEXT))
-		return &inflight_packed->desc[cur_desc->next];
+	struct vhost_blk_queue *vq = task->vq;
+	struct vring_packed_desc *desc;
 
-	return NULL;
+	rte_vhost_set_last_inflight_io_packed(task->ctrlr->vid, vq->id,
+					    task->inflight_idx);
+
+	desc = &vq->vring.desc_packed[vq->last_used_idx];
+	desc->id = task->buffer_id;
+	desc->addr = 0;
+
+	rte_smp_mb();
+	if (vq->used_wrap_counter)
+		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
+	else
+		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
+	rte_smp_mb();
+
+	rte_vhost_clr_inflight_desc_packed(task->ctrlr->vid, vq->id,
+					   task->inflight_idx);
+
+	vq->last_used_idx += task->chain_num;
+	if (vq->last_used_idx >= vq->vring.size) {
+		vq->last_used_idx -= vq->vring.size;
+		vq->used_wrap_counter = !vq->used_wrap_counter;
+	}
+
+	/* Send an interrupt back to the guest VM so that it knows
+	 * a completion is ready to be processed.
+	 */
+	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
 }
 
 static bool
-inflight_desc_has_next(struct rte_vhost_inflight_desc_packed *cur_desc)
+descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
 {
 	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
 }
 
 static bool
-inflight_desc_is_wr(struct rte_vhost_inflight_desc_packed *cur_desc)
+descriptor_has_next_split(struct vring_desc *cur_desc)
 {
-	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
 }
 
-static void
-inflight_process_payload_chain_packed(struct inflight_blk_task *task)
+static int
+desc_payload_to_iovs(struct vhost_blk_ctrlr *ctrlr, struct iovec *iovs,
+		     uint32_t *iov_index, uintptr_t payload, uint64_t remaining)
 {
-	void *data;
-	uint64_t chunck_len;
-	struct vhost_blk_task *blk_task;
-	struct rte_vhost_inflight_desc_packed *desc;
-
-	blk_task = &task->blk_task;
-	blk_task->iovs_cnt = 0;
+	void *vva;
+	uint64_t len;
 
 	do {
-		desc = task->inflight_desc;
-		chunck_len = desc->len;
-		data = (void *)(uintptr_t)gpa_to_vva(blk_task->bdev->vid,
-						     desc->addr,
-						     &chunck_len);
-		if (!data || chunck_len != desc->len) {
+		if (*iov_index >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "VHOST_BLK_MAX_IOVS reached\n");
+			return -1;
+		}
+		len = remaining;
+		vva = (void *)(uintptr_t)gpa_to_vva(ctrlr,
+				 payload, &len);
+		if (!vva || !len) {
 			fprintf(stderr, "failed to translate desc address.\n");
-			return;
+			return -1;
 		}
 
-		blk_task->iovs[blk_task->iovs_cnt].iov_base = data;
-		blk_task->iovs[blk_task->iovs_cnt].iov_len = desc->len;
-		blk_task->data_len += desc->len;
-		blk_task->iovs_cnt++;
-		task->inflight_desc = inflight_desc_get_next(
-					task->inflight_packed, desc);
-	} while (inflight_desc_has_next(task->inflight_desc));
-
-	chunck_len = task->inflight_desc->len;
-	blk_task->status = (void *)(uintptr_t)gpa_to_vva(
-		blk_task->bdev->vid, task->inflight_desc->addr, &chunck_len);
-	if (!blk_task->status || chunck_len != task->inflight_desc->len)
-		fprintf(stderr, "failed to translate desc address.\n");
+		iovs[*iov_index].iov_base = vva;
+		iovs[*iov_index].iov_len = len;
+		payload += len;
+		remaining -= len;
+		(*iov_index)++;
+	} while (remaining);
+
+	return 0;
 }
 
-static void
-inflight_submit_completion_packed(struct inflight_blk_task *task,
-					      uint32_t q_idx, uint16_t *used_id,
-					      bool *used_wrap_counter)
+static struct vring_desc *
+vring_get_next_desc(struct vhost_blk_queue *vq, struct vring_desc *desc)
 {
-	struct vhost_blk_ctrlr *ctrlr;
-	struct rte_vhost_vring *vq;
-	struct vring_packed_desc *desc;
-	int ret;
+	if (descriptor_has_next_split(desc))
+		return &vq->vring.desc[desc->next];
 
-	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
-	vq = task->blk_task.vq;
-
-	ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
-						    task->blk_task.head_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to set last inflight io\n");
-
-	desc = &vq->desc_packed[*used_id];
-	desc->id = task->blk_task.buffer_id;
-	rte_smp_mb();
-	if (*used_wrap_counter)
-		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
-	else
-		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
-	rte_smp_mb();
+	return NULL;
+}
 
-	*used_id += task->blk_task.iovs_cnt + 2;
-	if (*used_id >= vq->size) {
-		*used_id -= vq->size;
-		*used_wrap_counter = !(*used_wrap_counter);
+static struct vring_packed_desc *
+vring_get_next_desc_packed(struct vhost_blk_queue *vq, uint16_t *req_idx)
+{
+	if (descriptor_has_next_packed(&vq->vring.desc_packed[*req_idx])) {
+		*req_idx = (*req_idx + 1) % vq->vring.size;
+		return &vq->vring.desc_packed[*req_idx];
 	}
 
-	ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
-						 task->blk_task.head_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to clear inflight io\n");
+	return NULL;
+}
+
+static struct rte_vhost_inflight_desc_packed *
+vring_get_next_inflight_desc(struct vhost_blk_queue *vq,
+			struct rte_vhost_inflight_desc_packed *desc)
+{
+	if (!!(desc->flags & VRING_DESC_F_NEXT))
+		return &vq->inflight_ring.inflight_packed->desc[desc->next];
 
-	/* Send an interrupt back to the guest VM so that it knows
-	 * a completion is ready to be processed.
-	 */
-	rte_vhost_vring_call(task->blk_task.bdev->vid, q_idx);
+	return NULL;
 }
 
-static void
-submit_completion_packed(struct vhost_blk_task *task, uint32_t q_idx,
-				  uint16_t *used_id, bool *used_wrap_counter)
+static int
+setup_iovs_from_descs_split(struct vhost_blk_ctrlr *ctrlr,
+			    struct vhost_blk_queue *vq, uint16_t req_idx,
+			    struct iovec *iovs, uint32_t *iovs_idx,
+			    uint32_t *payload)
 {
-	struct vhost_blk_ctrlr *ctrlr;
-	struct rte_vhost_vring *vq;
-	struct vring_packed_desc *desc;
-	int ret;
+	struct vring_desc *desc = &vq->vring.desc[req_idx];
 
-	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
-	vq = task->vq;
+	do {
+		/* does not support indirect descriptors */
+		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
 
-	ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx,
-						    task->inflight_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to set last inflight io\n");
+		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
+			return -1;
+		}
 
-	desc = &vq->desc_packed[*used_id];
-	desc->id = task->buffer_id;
-	rte_smp_mb();
-	if (*used_wrap_counter)
-		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
-	else
-		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
-	rte_smp_mb();
+		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
+			desc->addr, desc->len) != 0) {
+			fprintf(stderr, "Failed to convert desc payload to iovs\n");
+			return -1;
+		}
 
-	*used_id += task->iovs_cnt + 2;
-	if (*used_id >= vq->size) {
-		*used_id -= vq->size;
-		*used_wrap_counter = !(*used_wrap_counter);
-	}
+		*payload += desc->len;
 
-	ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
-						 task->inflight_idx);
-	if (ret != 0)
-		fprintf(stderr, "failed to clear inflight io\n");
+		desc = vring_get_next_desc(vq, desc);
+	} while (desc != NULL);
 
-	/* Send an interrupt back to the guest VM so that it knows
-	 * a completion is ready to be processed.
-	 */
-	rte_vhost_vring_call(task->bdev->vid, q_idx);
+	return 0;
 }
 
-static void
-vhost_process_payload_chain_packed(struct vhost_blk_task *task,
-	uint16_t *idx)
+static int
+setup_iovs_from_descs_packed(struct vhost_blk_ctrlr *ctrlr,
+			     struct vhost_blk_queue *vq, uint16_t req_idx,
+			     struct iovec *iovs, uint32_t *iovs_idx,
+			     uint32_t *payload)
 {
-	void *data;
-	uint64_t chunck_len;
-
-	task->iovs_cnt = 0;
+	struct vring_packed_desc *desc = &vq->vring.desc_packed[req_idx];
 
 	do {
-		chunck_len = task->desc_packed->len;
-		data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						     task->desc_packed->addr,
-							 &chunck_len);
-		if (!data || chunck_len != task->desc_packed->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			return;
+		/* does not support indirect descriptors */
+		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
+
+		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
+			return -1;
 		}
 
-		task->iovs[task->iovs_cnt].iov_base = data;
-		task->iovs[task->iovs_cnt].iov_len = task->desc_packed->len;
-		task->data_len += task->desc_packed->len;
-		task->iovs_cnt++;
-		task->desc_packed = descriptor_get_next_packed(task->vq, idx);
-	} while (descriptor_has_next_packed(task->desc_packed));
-
-	task->last_idx = *idx % task->vq->size;
-	chunck_len = task->desc_packed->len;
-	task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						   task->desc_packed->addr,
-						   &chunck_len);
-	if (!task->status || chunck_len != task->desc_packed->len)
-		fprintf(stderr, "failed to translate desc address.\n");
-}
+		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
+			desc->addr, desc->len) != 0) {
+			fprintf(stderr, "Failed to convert desc payload to iovs\n");
+			return -1;
+		}
 
+		*payload += desc->len;
 
-static int
-descriptor_is_available(struct rte_vhost_vring *vring, uint16_t idx,
-					bool avail_wrap_counter)
-{
-	uint16_t flags = vring->desc_packed[idx].flags;
+		desc = vring_get_next_desc_packed(vq, &req_idx);
+	} while (desc != NULL);
 
-	return ((!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter) &&
-		(!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter));
+	return 0;
 }
 
-static void
-process_requestq_packed(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
+static int
+setup_iovs_from_inflight_desc(struct vhost_blk_ctrlr *ctrlr,
+			      struct vhost_blk_queue *vq, uint16_t req_idx,
+			      struct iovec *iovs, uint32_t *iovs_idx,
+			      uint32_t *payload)
 {
-	bool avail_wrap_counter, used_wrap_counter;
-	uint16_t avail_idx, used_idx;
-	int ret;
-	uint64_t chunck_len;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_vring *vq;
-	struct vhost_blk_task *task;
-
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	vq = &blk_vq->vq;
-
-	avail_idx = blk_vq->last_avail_idx;
-	avail_wrap_counter = blk_vq->avail_wrap_counter;
-	used_idx = blk_vq->last_used_idx;
-	used_wrap_counter = blk_vq->used_wrap_counter;
-
-	task = rte_zmalloc(NULL, sizeof(*task), 0);
-	assert(task != NULL);
-	task->vq = vq;
-	task->bdev = ctrlr->bdev;
+	struct rte_vhost_ring_inflight *inflight_vq;
+	struct rte_vhost_inflight_desc_packed *desc;
 
-	while (descriptor_is_available(vq, avail_idx, avail_wrap_counter)) {
-		task->head_idx = avail_idx;
-		task->desc_packed = &task->vq->desc_packed[task->head_idx];
-		task->iovs_cnt = 0;
-		task->data_len = 0;
-		task->req = NULL;
-		task->status = NULL;
+	inflight_vq = &vq->inflight_ring;
+	desc = &inflight_vq->inflight_packed->desc[req_idx];
 
+	do {
 		/* does not support indirect descriptors */
-		assert((task->desc_packed->flags & VRING_DESC_F_INDIRECT) == 0);
+		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
 
-		chunck_len = task->desc_packed->len;
-		task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-			task->desc_packed->addr, &chunck_len);
-		if (!task->req || chunck_len != task->desc_packed->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
+		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
+			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
+			return -1;
 		}
 
-		task->desc_packed = descriptor_get_next_packed(task->vq,
-								&avail_idx);
-		assert(task->desc_packed != NULL);
-		if (!descriptor_has_next_packed(task->desc_packed)) {
-			task->dxfer_dir = BLK_DIR_NONE;
-			task->last_idx = avail_idx % vq->size;
-			chunck_len = task->desc_packed->len;
-			task->status = (void *)(uintptr_t)
-					      gpa_to_vva(task->bdev->vid,
-							task->desc_packed->addr,
-							&chunck_len);
-			if (!task->status ||
-				chunck_len != task->desc_packed->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
-				return;
-			}
-		} else {
-			task->readtype = descriptor_is_wr_packed(
-							task->desc_packed);
-			vhost_process_payload_chain_packed(task, &avail_idx);
-		}
-		task->buffer_id = vq->desc_packed[task->last_idx].id;
-		rte_vhost_set_inflight_desc_packed(ctrlr->bdev->vid, q_idx,
-						   task->head_idx,
-						   task->last_idx,
-						   &task->inflight_idx);
-
-		if (++avail_idx >= vq->size) {
-			avail_idx -= vq->size;
-			avail_wrap_counter = !avail_wrap_counter;
+		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
+			desc->addr, desc->len) != 0) {
+			fprintf(stderr, "Failed to convert desc payload to iovs\n");
+			return -1;
 		}
-		blk_vq->last_avail_idx = avail_idx;
-		blk_vq->avail_wrap_counter = avail_wrap_counter;
 
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
-		if (ret) {
-			/* invalid response */
-			*task->status = VIRTIO_BLK_S_IOERR;
-		} else {
-			/* successfully */
-			*task->status = VIRTIO_BLK_S_OK;
-		}
+		*payload += desc->len;
 
-		submit_completion_packed(task, q_idx, &used_idx,
-						&used_wrap_counter);
-		blk_vq->last_used_idx = used_idx;
-		blk_vq->used_wrap_counter = used_wrap_counter;
-	}
+		desc = vring_get_next_inflight_desc(vq, desc);
+	} while (desc != NULL);
 
-	rte_free(task);
+	return 0;
 }
 
 static void
-submit_inflight_vq_packed(struct vhost_blk_ctrlr *ctrlr,
-	uint16_t q_idx)
+process_blk_task(struct vhost_blk_task *task)
 {
-	bool used_wrap_counter;
-	int req_idx, ret;
-	uint16_t used_idx;
-	uint64_t chunck_len;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_ring_inflight *inflight_vq;
-	struct rte_vhost_resubmit_info *resubmit_info;
-	struct rte_vhost_vring *vq;
-	struct inflight_blk_task *task;
-	struct vhost_blk_task *blk_task;
-	struct rte_vhost_inflight_info_packed *inflight_info;
-
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	vq = &blk_vq->vq;
-	inflight_vq = &blk_vq->inflight_vq;
-	resubmit_info = inflight_vq->resubmit_inflight;
-	inflight_info = inflight_vq->inflight_packed;
-	used_idx = blk_vq->last_used_idx;
-	used_wrap_counter = blk_vq->used_wrap_counter;
-
-	task = rte_malloc(NULL, sizeof(*task), 0);
-	if (!task) {
-		fprintf(stderr, "failed to allocate memory\n");
-		return;
-	}
-	blk_task = &task->blk_task;
-	blk_task->vq = vq;
-	blk_task->bdev = ctrlr->bdev;
-	task->inflight_packed = inflight_vq->inflight_packed;
-
-	while (resubmit_info->resubmit_num-- > 0) {
-		req_idx = resubmit_info->resubmit_num;
-		blk_task->head_idx =
-			resubmit_info->resubmit_list[req_idx].index;
-		task->inflight_desc =
-			&inflight_info->desc[blk_task->head_idx];
-		task->blk_task.iovs_cnt = 0;
-		task->blk_task.data_len = 0;
-		task->blk_task.req = NULL;
-		task->blk_task.status = NULL;
-
-		/* update the avail idx too
-		 * as it's initial value equals to used idx
-		 */
-		blk_vq->last_avail_idx += task->inflight_desc->num;
-		if (blk_vq->last_avail_idx >= vq->size) {
-			blk_vq->last_avail_idx -= vq->size;
-			blk_vq->avail_wrap_counter =
-				!blk_vq->avail_wrap_counter;
-		}
+	uint32_t payload = 0;
 
-		/* does not support indirect descriptors */
-		assert(task->inflight_desc != NULL);
-		assert((task->inflight_desc->flags &
-			VRING_DESC_F_INDIRECT) == 0);
-
-		chunck_len = task->inflight_desc->len;
-		blk_task->req = (void *)(uintptr_t)
-				     gpa_to_vva(blk_task->bdev->vid,
-						task->inflight_desc->addr,
-						&chunck_len);
-		if (!blk_task->req ||
-			chunck_len != task->inflight_desc->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
-		}
+	if (task->vq->packed_ring) {
+		struct rte_vhost_ring_inflight *inflight_ring;
+		struct rte_vhost_resubmit_info *resubmit_inflight;
 
-		task->inflight_desc = inflight_desc_get_next(
-			task->inflight_packed, task->inflight_desc);
-		assert(task->inflight_desc != NULL);
-		if (!inflight_desc_has_next(task->inflight_desc)) {
-			blk_task->dxfer_dir = BLK_DIR_NONE;
-			chunck_len = task->inflight_desc->len;
-			blk_task->status = (void *)(uintptr_t)
-				gpa_to_vva(blk_task->bdev->vid,
-						task->inflight_desc->addr,
-						&chunck_len);
-			if (!blk_task->status ||
-			    chunck_len != task->inflight_desc->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
+		inflight_ring = &task->vq->inflight_ring;
+		resubmit_inflight = inflight_ring->resubmit_inflight;
+
+		if (resubmit_inflight != NULL &&
+		    resubmit_inflight->resubmit_list != NULL) {
+			if (setup_iovs_from_inflight_desc(task->ctrlr, task->vq,
+				task->req_idx, task->iovs, &task->iovs_cnt,
+				&payload)) {
+				fprintf(stderr, "Failed to setup iovs\n");
 				return;
 			}
 		} else {
-			blk_task->readtype =
-			inflight_desc_is_wr(task->inflight_desc);
-			inflight_process_payload_chain_packed(task);
+			if (setup_iovs_from_descs_packed(task->ctrlr, task->vq,
+				task->req_idx, task->iovs, &task->iovs_cnt,
+				&payload)) {
+				fprintf(stderr, "Failed to setup iovs\n");
+				return;
+			}
 		}
+	} else {
+		if (setup_iovs_from_descs_split(task->ctrlr, task->vq,
+			task->req_idx, task->iovs, &task->iovs_cnt, &payload)) {
+			fprintf(stderr, "Failed to setup iovs\n");
+			return;
+		}
+	}
 
-		blk_task->buffer_id = task->inflight_desc->id;
-
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, blk_task);
-		if (ret)
-			/* invalid response */
-			*blk_task->status = VIRTIO_BLK_S_IOERR;
-		else
-			/* successfully */
-			*blk_task->status = VIRTIO_BLK_S_OK;
-
-		inflight_submit_completion_packed(task, q_idx, &used_idx,
-						  &used_wrap_counter);
+	/* First IOV must be the req head. */
+	task->req = (struct virtio_blk_outhdr *)task->iovs[0].iov_base;
+	assert(sizeof(*task->req) == task->iovs[0].iov_len);
 
-		blk_vq->last_used_idx = used_idx;
-		blk_vq->used_wrap_counter = used_wrap_counter;
-	}
+	/* Last IOV must be the status tail. */
+	task->status = (uint8_t *)task->iovs[task->iovs_cnt - 1].iov_base;
+	assert(sizeof(*task->status) == task->iovs[task->iovs_cnt - 1].iov_len);
 
-	rte_free(task);
-}
+	/* Transport data len */
+	task->data_len = payload - task->iovs[0].iov_len -
+		task->iovs[task->iovs_cnt - 1].iov_len;
 
-static struct vring_desc *
-descriptor_get_next_split(struct vring_desc *vq_desc,
-				   struct vring_desc *cur_desc)
-{
-	return &vq_desc[cur_desc->next];
-}
+	if (vhost_bdev_process_blk_commands(task->ctrlr->bdev, task))
+		/* invalid response */
+		*task->status = VIRTIO_BLK_S_IOERR;
+	else
+		/* successfully */
+		*task->status = VIRTIO_BLK_S_OK;
 
-static bool
-descriptor_has_next_split(struct vring_desc *cur_desc)
-{
-	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
+	if (task->vq->packed_ring)
+		enqueue_task_packed(task);
+	else
+		enqueue_task(task);
 }
 
-static bool
-descriptor_is_wr_split(struct vring_desc *cur_desc)
+static void
+blk_task_init(struct vhost_blk_task *task)
 {
-	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+	task->iovs_cnt = 0;
+	task->data_len = 0;
+	task->req = NULL;
+	task->status = NULL;
 }
 
 static void
-vhost_process_payload_chain_split(struct vhost_blk_task *task)
+submit_inflight_vq(struct vhost_blk_queue *vq)
 {
-	void *data;
-	uint64_t chunck_len;
+	struct rte_vhost_ring_inflight *inflight_ring;
+	struct rte_vhost_resubmit_info *resubmit_inflight;
+	struct vhost_blk_task *task;
 
-	task->iovs_cnt = 0;
+	inflight_ring = &vq->inflight_ring;
+	resubmit_inflight = inflight_ring->resubmit_inflight;
 
-	do {
-		chunck_len = task->desc_split->len;
-		data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						     task->desc_split->addr,
-						     &chunck_len);
-		if (!data || chunck_len != task->desc_split->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			return;
-		}
+	if (resubmit_inflight == NULL ||
+	    resubmit_inflight->resubmit_num == 0)
+		return;
 
-		task->iovs[task->iovs_cnt].iov_base = data;
-		task->iovs[task->iovs_cnt].iov_len = task->desc_split->len;
-		task->data_len += task->desc_split->len;
-		task->iovs_cnt++;
-		task->desc_split =
-		descriptor_get_next_split(task->vq->desc, task->desc_split);
-	} while (descriptor_has_next_split(task->desc_split));
-
-	chunck_len = task->desc_split->len;
-	task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-						     task->desc_split->addr,
-						     &chunck_len);
-	if (!task->status || chunck_len != task->desc_split->len)
-		fprintf(stderr, "failed to translate desc address.\n");
-}
+	fprintf(stdout, "Resubmit inflight num is %d\n",
+		resubmit_inflight->resubmit_num);
 
-static void
-submit_completion_split(struct vhost_blk_task *task, uint32_t vid,
-	uint32_t q_idx)
-{
-	struct rte_vhost_vring *vq;
-	struct vring_used *used;
+	while (resubmit_inflight->resubmit_num-- > 0) {
+		uint16_t desc_idx;
 
-	vq = task->vq;
-	used = vq->used;
+		desc_idx = resubmit_inflight->resubmit_list[
+					resubmit_inflight->resubmit_num].index;
 
-	rte_vhost_set_last_inflight_io_split(vid, q_idx, task->req_idx);
+		if (vq->packed_ring) {
+			uint16_t task_idx;
+			struct rte_vhost_inflight_desc_packed *desc;
 
-	/* Fill out the next entry in the "used" ring.  id = the
-	 * index of the descriptor that contained the blk request.
-	 * len = the total amount of data transferred for the blk
-	 * request. We must report the correct len, for variable
-	 * length blk CDBs, where we may return less data than
-	 * allocated by the guest VM.
-	 */
-	used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
-	used->ring[used->idx & (vq->size - 1)].len = task->data_len;
-	rte_smp_mb();
-	used->idx++;
-	rte_smp_mb();
+			desc = inflight_ring->inflight_packed->desc;
+			task_idx = desc[desc[desc_idx].last].id;
+			task = &vq->tasks[task_idx];
 
-	rte_vhost_clr_inflight_desc_split(vid, q_idx, used->idx, task->req_idx);
+			task->req_idx = desc_idx;
+			task->chain_num = desc[desc_idx].num;
+			task->buffer_id = task_idx;
+			task->inflight_idx = desc_idx;
 
-	/* Send an interrupt back to the guest VM so that it knows
-	 * a completion is ready to be processed.
-	 */
-	rte_vhost_vring_call(task->bdev->vid, q_idx);
+			vq->last_avail_idx += desc[desc_idx].num;
+			if (vq->last_avail_idx >= vq->vring.size) {
+				vq->last_avail_idx -= vq->vring.size;
+				vq->avail_wrap_counter =
+					!vq->avail_wrap_counter;
+			}
+		} else
+			/* In split ring, the desc_idx is the req_id
+			 * which was initialized when allocated the task pool.
+			 */
+			task = &vq->tasks[desc_idx];
+
+		blk_task_init(task);
+		process_blk_task(task);
+	}
+
+	free(resubmit_inflight->resubmit_list);
+	resubmit_inflight->resubmit_list = NULL;
 }
 
-static void
-submit_inflight_vq_split(struct vhost_blk_ctrlr *ctrlr,
-	uint32_t q_idx)
+/* Use the buffer_id as the task_idx */
+static uint16_t
+vhost_blk_vq_get_desc_chain_buffer_id(struct vhost_blk_queue *vq,
+				      uint16_t *req_head, uint16_t *num)
 {
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_ring_inflight *inflight_vq;
-	struct rte_vhost_resubmit_info *resubmit_inflight;
-	struct rte_vhost_resubmit_desc *resubmit_list;
-	struct vhost_blk_task *task;
-	int req_idx;
-	uint64_t chunck_len;
-	int ret;
+	struct vring_packed_desc *desc = &vq->vring.desc_packed[
+						vq->last_avail_idx];
 
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	inflight_vq = &blk_vq->inflight_vq;
-	resubmit_inflight = inflight_vq->resubmit_inflight;
-	resubmit_list = resubmit_inflight->resubmit_list;
+	*req_head = vq->last_avail_idx;
+	*num = 1;
 
-	task = rte_zmalloc(NULL, sizeof(*task), 0);
-	assert(task != NULL);
+	while (descriptor_has_next_packed(desc)) {
+		vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
+		desc = &vq->vring.desc_packed[vq->last_avail_idx];
+		*num += 1;
+	}
 
-	task->ctrlr = ctrlr;
-	task->bdev = ctrlr->bdev;
-	task->vq = &blk_vq->vq;
+	/* Point to next desc */
+	vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
+	if (vq->last_avail_idx < *req_head)
+		vq->avail_wrap_counter = !vq->avail_wrap_counter;
 
-	while (resubmit_inflight->resubmit_num-- > 0) {
-		req_idx = resubmit_list[resubmit_inflight->resubmit_num].index;
-		task->req_idx = req_idx;
-		task->desc_split = &task->vq->desc[task->req_idx];
-		task->iovs_cnt = 0;
-		task->data_len = 0;
-		task->req = NULL;
-		task->status = NULL;
+	return desc->id;
+}
 
-		/* does not support indirect descriptors */
-		assert(task->desc_split != NULL);
-		assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
+static uint16_t
+vq_get_desc_idx(struct vhost_blk_queue *vq)
+{
+	uint16_t desc_idx;
+	uint16_t last_avail_idx;
 
-		chunck_len = task->desc_split->len;
-		task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-				task->desc_split->addr, &chunck_len);
-		if (!task->req || chunck_len != task->desc_split->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
-		}
+	last_avail_idx = vq->last_avail_idx & (vq->vring.size - 1);
+	desc_idx = vq->vring.avail->ring[last_avail_idx];
+	vq->last_avail_idx++;
 
-		task->desc_split = descriptor_get_next_split(task->vq->desc,
-							     task->desc_split);
-		if (!descriptor_has_next_split(task->desc_split)) {
-			task->dxfer_dir = BLK_DIR_NONE;
-			chunck_len = task->desc_split->len;
-			task->status = (void *)(uintptr_t)
-				       gpa_to_vva(task->bdev->vid,
-						  task->desc_split->addr,
-						  &chunck_len);
-			if (!task->status ||
-				chunck_len != task->desc_split->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
-				return;
-			}
-		} else {
-			task->readtype =
-				descriptor_is_wr_split(task->desc_split);
-			vhost_process_payload_chain_split(task);
-		}
+	return desc_idx;
+}
 
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
-		if (ret) {
-			/* invalid response */
-			*task->status = VIRTIO_BLK_S_IOERR;
-		} else {
-			/* successfully */
-			*task->status = VIRTIO_BLK_S_OK;
-		}
-		submit_completion_split(task, ctrlr->bdev->vid, q_idx);
-	}
+static int
+vhost_blk_vq_is_avail(struct vhost_blk_queue *vq)
+{
+	if (vq->packed_ring) {
+		uint16_t flags = vq->vring.desc_packed[
+					vq->last_avail_idx].flags;
+		bool avail_wrap_counter = vq->avail_wrap_counter;
 
-	rte_free(task);
+		return (!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter &&
+			!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter);
+	} else {
+		if (vq->vring.avail->idx != vq->last_avail_idx)
+			return 1;
+
+		return 0;
+	}
 }
 
 static void
-process_requestq_split(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx)
+process_vq(struct vhost_blk_queue *vq)
 {
-	int ret;
-	int req_idx;
-	uint16_t last_idx;
-	uint64_t chunck_len;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_vring *vq;
 	struct vhost_blk_task *task;
 
-	blk_vq = &ctrlr->bdev->queues[q_idx];
-	vq = &blk_vq->vq;
+	if (vq->packed_ring) {
+		while (vhost_blk_vq_is_avail(vq)) {
+			uint16_t task_idx, req_idx, last_idx, chain_num;
 
-	task = rte_zmalloc(NULL, sizeof(*task), 0);
-	assert(task != NULL);
-	task->ctrlr = ctrlr;
-	task->bdev = ctrlr->bdev;
-	task->vq = vq;
+			task_idx = vhost_blk_vq_get_desc_chain_buffer_id(vq,
+					&req_idx, &chain_num);
+			task = &vq->tasks[task_idx];
 
-	while (vq->avail->idx != blk_vq->last_avail_idx) {
-		last_idx = blk_vq->last_avail_idx & (vq->size - 1);
-		req_idx = vq->avail->ring[last_idx];
-		task->req_idx = req_idx;
-		task->desc_split = &task->vq->desc[task->req_idx];
-		task->iovs_cnt = 0;
-		task->data_len = 0;
-		task->req = NULL;
-		task->status = NULL;
+			blk_task_init(task);
+			task->req_idx = req_idx;
+			task->chain_num = chain_num;
+			task->buffer_id = task_idx;
+			last_idx = (req_idx + chain_num - 1) % vq->vring.size;
 
-		rte_vhost_set_inflight_desc_split(ctrlr->bdev->vid, q_idx,
-							task->req_idx);
-
-		/* does not support indirect descriptors */
-		assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0);
+			rte_vhost_set_inflight_desc_packed(task->ctrlr->vid,
+							   vq->id,
+							   task->req_idx,
+							   last_idx,
+							   &task->inflight_idx);
 
-		chunck_len = task->desc_split->len;
-		task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
-				task->desc_split->addr, &chunck_len);
-		if (!task->req || chunck_len != task->desc_split->len) {
-			fprintf(stderr, "failed to translate desc address.\n");
-			rte_free(task);
-			return;
-		}
-
-		task->desc_split = descriptor_get_next_split(task->vq->desc,
-							     task->desc_split);
-		if (!descriptor_has_next_split(task->desc_split)) {
-			task->dxfer_dir = BLK_DIR_NONE;
-			chunck_len = task->desc_split->len;
-			task->status = (void *)(uintptr_t)
-					      gpa_to_vva(task->bdev->vid,
-							 task->desc_split->addr,
-							 &chunck_len);
-			if (!task->status ||
-				chunck_len != task->desc_split->len) {
-				fprintf(stderr,
-					"failed to translate desc address.\n");
-				rte_free(task);
-				return;
-			}
-		} else {
-			task->readtype =
-				descriptor_is_wr_split(task->desc_split);
-			vhost_process_payload_chain_split(task);
+			process_blk_task(task);
 		}
-		blk_vq->last_avail_idx++;
-
-		ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task);
-		if (ret) {
-			/* invalid response */
-			*task->status = VIRTIO_BLK_S_IOERR;
-		} else {
-			/* successfully */
-			*task->status = VIRTIO_BLK_S_OK;
+	} else {
+		while (vhost_blk_vq_is_avail(vq)) {
+			uint16_t desc_idx;
+
+			desc_idx = vq_get_desc_idx(vq);
+			task = &vq->tasks[desc_idx];
+
+			blk_task_init(task);
+			rte_vhost_set_inflight_desc_split(task->ctrlr->vid,
+							  vq->id,
+							  task->req_idx);
+			process_blk_task(task);
 		}
-
-		submit_completion_split(task, ctrlr->bdev->vid, q_idx);
 	}
-
-	rte_free(task);
 }
 
 static void *
 ctrlr_worker(void *arg)
 {
 	struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_ring_inflight *inflight_vq;
 	cpu_set_t cpuset;
 	pthread_t thread;
 	int i;
@@ -774,106 +543,128 @@ ctrlr_worker(void *arg)
 	CPU_SET(0, &cpuset);
 	pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
 
+	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
+		submit_inflight_vq(&ctrlr->queues[i]);
+
+	while (worker_thread_status != WORKER_STATE_STOP)
+		for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
+			process_vq(&ctrlr->queues[i]);
+
+	fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
+	sem_post(&exit_sem);
+	return NULL;
+}
+
+static int
+alloc_task_pool(struct vhost_blk_ctrlr *ctrlr)
+{
+	struct vhost_blk_queue *vq;
+	int i, j;
+
 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-		blk_vq = &ctrlr->bdev->queues[i];
-		inflight_vq = &blk_vq->inflight_vq;
-		if (inflight_vq->resubmit_inflight != NULL &&
-		    inflight_vq->resubmit_inflight->resubmit_num != 0) {
-			if (ctrlr->packed_ring)
-				submit_inflight_vq_packed(ctrlr, i);
-			else
-				submit_inflight_vq_split(ctrlr, i);
+		vq = &ctrlr->queues[i];
+
+		vq->tasks = rte_zmalloc(NULL,
+			sizeof(struct vhost_blk_task) * vq->vring.size, 0);
+		if (!vq->tasks) {
+			fprintf(stderr, "Failed to allocate task memory\n");
+			return -1;
 		}
-	}
 
-	while (!g_should_stop && ctrlr->bdev != NULL) {
-		for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-			if (ctrlr->packed_ring)
-				process_requestq_packed(ctrlr, i);
-			else
-				process_requestq_split(ctrlr, i);
+		for (j = 0; j < vq->vring.size; j++) {
+			vq->tasks[j].req_idx = j;
+			vq->tasks[j].ctrlr = ctrlr;
+			vq->tasks[j].vq = vq;
 		}
 	}
 
-	g_should_stop = 2;
-	fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
-	sem_post(&exit_sem);
-	return NULL;
+	return 0;
+}
+
+static void
+free_task_pool(struct vhost_blk_ctrlr *ctrlr)
+{
+	int i;
+
+	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
+		rte_free(ctrlr->queues[i].tasks);
 }
 
 static int
 new_device(int vid)
 {
 	struct vhost_blk_ctrlr *ctrlr;
-	struct vhost_blk_queue *blk_vq;
-	struct rte_vhost_vring *vq;
+	struct vhost_blk_queue *vq;
+	char path[PATH_MAX];
 	uint64_t features;
 	pthread_t tid;
 	int i, ret;
+	bool packed_ring;
 
-	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
+	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
+	if (ret) {
+		fprintf(stderr, "Failed to get the socket path\n");
+		return -1;
+	}
+
+	ctrlr = vhost_blk_ctrlr_find(path);
 	if (!ctrlr) {
-		fprintf(stderr, "Controller is not ready\n");
+		fprintf(stderr, "Failed to find controller\n");
 		return -1;
 	}
 
 	if (ctrlr->started)
 		return 0;
 
-	ctrlr->bdev->vid = vid;
+	ctrlr->vid = vid;
 	ret = rte_vhost_get_negotiated_features(vid, &features);
 	if (ret) {
-		fprintf(stderr, "failed to get the negotiated features\n");
+		fprintf(stderr, "Failed to get the negotiated features\n");
 		return -1;
 	}
-	ctrlr->packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
-
-	ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
-	if (ret)
-		fprintf(stderr, "Get Controller memory region failed\n");
-	assert(ctrlr->mem != NULL);
+	packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
 
 	/* Disable Notifications and init last idx */
 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-		blk_vq = &ctrlr->bdev->queues[i];
-		vq = &blk_vq->vq;
-
-		ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
-		assert(ret == 0);
-
-		ret = rte_vhost_get_vring_base(ctrlr->bdev->vid, i,
-					       &blk_vq->last_avail_idx,
-					       &blk_vq->last_used_idx);
-		assert(ret == 0);
-
-		ret = rte_vhost_get_vhost_ring_inflight(ctrlr->bdev->vid, i,
-							&blk_vq->inflight_vq);
-		assert(ret == 0);
-
-		if (ctrlr->packed_ring) {
+		vq = &ctrlr->queues[i];
+		vq->id = i;
+
+		assert(rte_vhost_get_vhost_vring(ctrlr->vid, i,
+						 &vq->vring) == 0);
+		assert(rte_vhost_get_vring_base(ctrlr->vid, i,
+					       &vq->last_avail_idx,
+					       &vq->last_used_idx) == 0);
+		assert(rte_vhost_get_vhost_ring_inflight(ctrlr->vid, i,
+						&vq->inflight_ring) == 0);
+
+		if (packed_ring) {
 			/* for the reconnection */
-			ret = rte_vhost_get_vring_base_from_inflight(
-				ctrlr->bdev->vid, i,
-				&blk_vq->last_avail_idx,
-				&blk_vq->last_used_idx);
-			assert(ret == 0);
+			assert(rte_vhost_get_vring_base_from_inflight(
+				ctrlr->vid, i,
+				&vq->last_avail_idx,
+				&vq->last_used_idx) == 0);
 
-			blk_vq->avail_wrap_counter = blk_vq->last_avail_idx &
+			vq->avail_wrap_counter = vq->last_avail_idx &
 				(1 << 15);
-			blk_vq->last_avail_idx = blk_vq->last_avail_idx &
+			vq->last_avail_idx = vq->last_avail_idx &
 				0x7fff;
-			blk_vq->used_wrap_counter = blk_vq->last_used_idx &
+			vq->used_wrap_counter = vq->last_used_idx &
 				(1 << 15);
-			blk_vq->last_used_idx = blk_vq->last_used_idx &
+			vq->last_used_idx = vq->last_used_idx &
 				0x7fff;
 		}
 
+		vq->packed_ring = packed_ring;
 		rte_vhost_enable_guest_notification(vid, i, 0);
 	}
 
+	assert(rte_vhost_get_mem_table(vid, &ctrlr->mem) == 0);
+	assert(ctrlr->mem != NULL);
+	assert(alloc_task_pool(ctrlr) == 0);
+
 	/* start polling vring */
-	g_should_stop = 0;
-	fprintf(stdout, "New Device %s, Device ID %d\n", dev_pathname, vid);
+	worker_thread_status = WORKER_STATE_START;
+	fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
 	if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
 		fprintf(stderr, "Worker Thread Started Failed\n");
 		return -1;
@@ -890,7 +681,7 @@ destroy_device(int vid)
 {
 	char path[PATH_MAX];
 	struct vhost_blk_ctrlr *ctrlr;
-	struct vhost_blk_queue *blk_vq;
+	struct vhost_blk_queue *vq;
 	int i, ret;
 
 	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
@@ -909,27 +700,27 @@ destroy_device(int vid)
 	if (!ctrlr->started)
 		return;
 
-	g_should_stop = 1;
-	while (g_should_stop != 2)
-		;
+	worker_thread_status = WORKER_STATE_STOP;
+	sem_wait(&exit_sem);
 
 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
-		blk_vq = &ctrlr->bdev->queues[i];
-		if (ctrlr->packed_ring) {
-			blk_vq->last_avail_idx |= (blk_vq->avail_wrap_counter <<
+		vq = &ctrlr->queues[i];
+		if (vq->packed_ring) {
+			vq->last_avail_idx |= (vq->avail_wrap_counter <<
 				15);
-			blk_vq->last_used_idx |= (blk_vq->used_wrap_counter <<
+			vq->last_used_idx |= (vq->used_wrap_counter <<
 				15);
 		}
-		rte_vhost_set_vring_base(ctrlr->bdev->vid, i,
-					 blk_vq->last_avail_idx,
-					 blk_vq->last_used_idx);
+
+		rte_vhost_set_vring_base(ctrlr->vid, i,
+					 vq->last_avail_idx,
+					 vq->last_used_idx);
 	}
 
+	free_task_pool(ctrlr);
 	free(ctrlr->mem);
 
 	ctrlr->started = 0;
-	sem_wait(&exit_sem);
 }
 
 static int
@@ -964,13 +755,13 @@ vhost_blk_bdev_construct(const char *bdev_name,
 	bdev->blockcnt = blk_cnt;
 	bdev->write_cache = wce_enable;
 
-	fprintf(stdout, "blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
+	fprintf(stdout, "Blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
 		bdev->blockcnt);
 
 	/* use memory as disk storage space */
 	bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
 	if (!bdev->data) {
-		fprintf(stderr, "no enough reserved huge memory for disk\n");
+		fprintf(stderr, "No enough reserved huge memory for disk\n");
 		free(bdev);
 		return NULL;
 	}
@@ -997,7 +788,7 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name)
 	unlink(dev_pathname);
 
 	if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
-		fprintf(stderr, "socket %s already exists\n", dev_pathname);
+		fprintf(stderr, "Socket %s already exists\n", dev_pathname);
 		return NULL;
 	}
 
@@ -1008,7 +799,7 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name)
 		return NULL;
 	}
 
-	/* set proper features */
+	/* set vhost user protocol features */
 	vhost_dev_install_rte_compat_hooks(dev_pathname);
 
 	ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
@@ -1033,28 +824,32 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name)
 }
 
 static void
-signal_handler(__rte_unused int signum)
+vhost_blk_ctrlr_destroy(struct vhost_blk_ctrlr *ctrlr)
 {
-	struct vhost_blk_ctrlr *ctrlr;
-
-	unlink(dev_pathname);
+	if (ctrlr->bdev != NULL) {
+		if (ctrlr->bdev->data != NULL)
+			rte_free(ctrlr->bdev->data);
 
-	if (g_should_stop != -1) {
-		g_should_stop = 1;
-		while (g_should_stop != 2)
-			;
+		rte_free(ctrlr->bdev);
 	}
+	rte_free(ctrlr);
+
+	rte_vhost_driver_unregister(dev_pathname);
+}
+
+static void
+signal_handler(__rte_unused int signum)
+{
+	struct vhost_blk_ctrlr *ctrlr;
 
 	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
-	if (ctrlr != NULL) {
-		if (ctrlr->bdev != NULL) {
-			rte_free(ctrlr->bdev->data);
-			rte_free(ctrlr->bdev);
-		}
-		rte_free(ctrlr);
-	}
+	if (ctrlr == NULL)
+		return;
 
-	rte_vhost_driver_unregister(dev_pathname);
+	if (ctrlr->started)
+		destroy_device(ctrlr->vid);
+
+	vhost_blk_ctrlr_destroy(ctrlr);
 	exit(0);
 }
 
@@ -1062,14 +857,12 @@ int main(int argc, char *argv[])
 {
 	int ret;
 
-	signal(SIGINT, signal_handler);
-
 	/* init EAL */
 	ret = rte_eal_init(argc, argv);
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
 
-	g_vhost_ctrlr = vhost_blk_ctrlr_construct("vhost.socket");
+	g_vhost_ctrlr = vhost_blk_ctrlr_construct(CTRLR_NAME);
 	if (g_vhost_ctrlr == NULL) {
 		fprintf(stderr, "Construct vhost blk controller failed\n");
 		return 0;
@@ -1080,6 +873,8 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
+	signal(SIGINT, signal_handler);
+
 	rte_vhost_driver_start(dev_pathname);
 
 	/* loop for exit the application */
diff --git a/examples/vhost_blk/vhost_blk.h b/examples/vhost_blk/vhost_blk.h
index 933e2b7c5..4a8040588 100644
--- a/examples/vhost_blk/vhost_blk.h
+++ b/examples/vhost_blk/vhost_blk.h
@@ -30,12 +30,18 @@ struct vring_packed_desc {
 #endif
 
 struct vhost_blk_queue {
-	struct rte_vhost_vring vq;
-	struct rte_vhost_ring_inflight inflight_vq;
+	struct rte_vhost_vring vring;
+	struct rte_vhost_ring_inflight inflight_ring;
+
 	uint16_t last_avail_idx;
 	uint16_t last_used_idx;
+	uint16_t id;
+
 	bool avail_wrap_counter;
 	bool used_wrap_counter;
+	bool packed_ring;
+
+	struct vhost_blk_task *tasks;
 };
 
 #define NUM_OF_BLK_QUEUES 1
@@ -43,10 +49,6 @@ struct vhost_blk_queue {
 #define min(a, b) (((a) < (b)) ? (a) : (b))
 
 struct vhost_block_dev {
-	/** ID for vhost library. */
-	int vid;
-	/** Queues for the block device */
-	struct vhost_blk_queue queues[NUM_OF_BLK_QUEUES];
 	/** Unique name for this block device. */
 	char name[64];
 
@@ -68,8 +70,10 @@ struct vhost_block_dev {
 
 struct vhost_blk_ctrlr {
 	uint8_t started;
-	uint8_t packed_ring;
-	uint8_t need_restart;
+	/** ID for vhost library. */
+	int vid;
+	/** Queues for the block device */
+	struct vhost_blk_queue queues[NUM_OF_BLK_QUEUES];
 	/** Only support 1 LUN for the example */
 	struct vhost_block_dev *bdev;
 	/** VM memory region */
@@ -85,31 +89,20 @@ enum blk_data_dir {
 };
 
 struct vhost_blk_task {
-	uint8_t readtype;
 	uint8_t req_idx;
-	uint16_t head_idx;
-	uint16_t last_idx;
+	uint16_t chain_num;
 	uint16_t inflight_idx;
 	uint16_t buffer_id;
 	uint32_t dxfer_dir;
 	uint32_t data_len;
-	struct virtio_blk_outhdr *req;
 
+	struct virtio_blk_outhdr *req;
 	volatile uint8_t *status;
-
 	struct iovec iovs[VHOST_BLK_MAX_IOVS];
 	uint32_t iovs_cnt;
-	struct vring_packed_desc *desc_packed;
-	struct vring_desc *desc_split;
-	struct rte_vhost_vring *vq;
-	struct vhost_block_dev *bdev;
-	struct vhost_blk_ctrlr *ctrlr;
-};
 
-struct inflight_blk_task {
-	struct vhost_blk_task blk_task;
-	struct rte_vhost_inflight_desc_packed *inflight_desc;
-	struct rte_vhost_inflight_info_packed *inflight_packed;
+	struct vhost_blk_queue *vq;
+	struct vhost_blk_ctrlr *ctrlr;
 };
 
 struct vhost_blk_ctrlr *g_vhost_ctrlr;
-- 
2.17.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2] examples/vhost_blk: refactor vhost-blk example
  2020-04-30 17:20 ` [dpdk-dev] [PATCH v2] " Jin Yu
@ 2020-04-30 20:41   ` Maxime Coquelin
  2020-04-30 20:53   ` Maxime Coquelin
  1 sibling, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2020-04-30 20:41 UTC (permalink / raw)
  To: Jin Yu, Tiwei Bie, Zhihong Wang, John McNamara, Marko Kovacevic; +Cc: dev



On 4/30/20 7:20 PM, Jin Yu wrote:
> Decrease the code and make it easier to read. It's
> useful for understanding the inflight APIs and how
> packed ring works. Update the RST because the packed
> ring patch has been merged to QEMU master and ring_packed
> parameter changes to packed.
> 
> Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample")

I don't think the Fixes tag makes sense, so I removed it.

> Signed-off-by: Jin Yu <jin.yu@intel.com>

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Next time, please rebase your patch on top of next-virtio/master or net-
next/master before posting.

> ---
> V2 - fix build error in 32-bit
> ---
>  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
>  examples/vhost_blk/blk.c               |   13 +-
>  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
>  examples/vhost_blk/vhost_blk.h         |   39 +-
>  4 files changed, 494 insertions(+), 705 deletions(-)
> 

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2] examples/vhost_blk: refactor vhost-blk example
  2020-04-30 17:20 ` [dpdk-dev] [PATCH v2] " Jin Yu
  2020-04-30 20:41   ` Maxime Coquelin
@ 2020-04-30 20:53   ` Maxime Coquelin
  1 sibling, 0 replies; 10+ messages in thread
From: Maxime Coquelin @ 2020-04-30 20:53 UTC (permalink / raw)
  To: Jin Yu, Tiwei Bie, Zhihong Wang, John McNamara, Marko Kovacevic; +Cc: dev



On 4/30/20 7:20 PM, Jin Yu wrote:
> Decrease the code and make it easier to read. It's
> useful for understanding the inflight APIs and how
> packed ring works. Update the RST because the packed
> ring patch has been merged to QEMU master and ring_packed
> parameter changes to packed.
> 
> Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample")
> 
> Signed-off-by: Jin Yu <jin.yu@intel.com>
> ---
> V2 - fix build error in 32-bit
> ---
>  doc/guides/sample_app_ug/vhost_blk.rst |    8 +-
>  examples/vhost_blk/blk.c               |   13 +-
>  examples/vhost_blk/vhost_blk.c         | 1139 ++++++++++--------------
>  examples/vhost_blk/vhost_blk.h         |   39 +-
>  4 files changed, 494 insertions(+), 705 deletions(-)

Applied to dpdk-next-virtio/master.

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-04-30 20:53 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-28 15:32 [dpdk-dev] [PATCH] examples/vhost_blk: refactor vhost-blk example Jin Yu
2020-04-27  8:01 ` Maxime Coquelin
2020-04-28 16:05 ` Maxime Coquelin
2020-04-29 17:53 ` Ferruh Yigit
2020-04-30  1:42   ` Yu, Jin
2020-04-30  9:08     ` Maxime Coquelin
2020-04-30  9:42       ` Yu, Jin
2020-04-30 17:20 ` [dpdk-dev] [PATCH v2] " Jin Yu
2020-04-30 20:41   ` Maxime Coquelin
2020-04-30 20:53   ` Maxime Coquelin

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git