From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1B2B0A00C5; Thu, 30 Apr 2020 11:41:12 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 7EA851DA5D; Thu, 30 Apr 2020 11:41:10 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 8DB8B1C29A for ; Thu, 30 Apr 2020 11:41:07 +0200 (CEST) IronPort-SDR: dtQx8/Drhugt5QafsGFvOz+hkZMCJg0rTd5JWvV5gjUoPPLafas+U986YwIQEhgUulzerfHb0S nkt8oVDGWkWQ== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 30 Apr 2020 02:41:05 -0700 IronPort-SDR: WxJvrlIS28H4XTa8cHBNn3TVzt50iu4tDkDj2F9NnhIGFfmuJqcNqfwJQgVqmoyK3vzbdE1D5W 9Ba3LMgI3rhw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.73,334,1583222400"; d="scan'208";a="282805929" Received: from storage36.sh.intel.com ([10.67.110.177]) by fmsmga004.fm.intel.com with ESMTP; 30 Apr 2020 02:41:02 -0700 From: Jin Yu To: Maxime Coquelin , Tiwei Bie , Zhihong Wang , John McNamara , Marko Kovacevic Cc: dev@dpdk.org, Jin Yu Date: Fri, 1 May 2020 01:20:08 +0800 Message-Id: <20200430172008.52657-1-jin.yu@intel.com> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20200228153235.31419-1-jin.yu@intel.com> References: <20200228153235.31419-1-jin.yu@intel.com> Subject: [dpdk-dev] [PATCH v2] examples/vhost_blk: refactor vhost-blk example X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Decrease the code and make it easier to read. It's useful for understanding the inflight APIs and how packed ring works. Update the RST because the packed ring patch has been merged to QEMU master and ring_packed parameter changes to packed. Fixes: c19beb3f38cd ("examples/vhost_blk: introduce vhost storage sample") Signed-off-by: Jin Yu --- V2 - fix build error in 32-bit --- doc/guides/sample_app_ug/vhost_blk.rst | 8 +- examples/vhost_blk/blk.c | 13 +- examples/vhost_blk/vhost_blk.c | 1139 ++++++++++-------------- examples/vhost_blk/vhost_blk.h | 39 +- 4 files changed, 494 insertions(+), 705 deletions(-) diff --git a/doc/guides/sample_app_ug/vhost_blk.rst b/doc/guides/sample_app_ug/vhost_blk.rst index 39096e2e4..681de6f3e 100644 --- a/doc/guides/sample_app_ug/vhost_blk.rst +++ b/doc/guides/sample_app_ug/vhost_blk.rst @@ -51,7 +51,7 @@ Start the VM -drive file=os.img,if=none,id=disk \ -device ide-hd,drive=disk,bootindex=0 \ -chardev socket,id=char0,reconnect=1,path=/tmp/vhost.socket \ - -device vhost-user-blk-pci,ring_packed=1,chardev=char0,num-queues=1 \ + -device vhost-user-blk-pci,packed=on,chardev=char0,num-queues=1 \ ... .. note:: @@ -59,5 +59,7 @@ Start the VM Qemu v4.0 or newer version is required. reconnect=1 means live recovery support that qemu can reconnect vhost_blk after we restart vhost_blk example. - ring_packed=1 means the device support packed ring but need the guest kernel - version >= 5.0 + packed=on means the device support packed ring but need the guest kernel + version >= 5.0. + Now Qemu commit 9bb73502321d46f4d320fa17aa38201445783fc4 both support the + vhost-blk reconnect and packed ring. diff --git a/examples/vhost_blk/blk.c b/examples/vhost_blk/blk.c index 1b0b764b2..9048e2f8a 100644 --- a/examples/vhost_blk/blk.c +++ b/examples/vhost_blk/blk.c @@ -50,7 +50,10 @@ vhost_bdev_blk_readwrite(struct vhost_block_dev *bdev, offset = lba_512 * 512; - for (i = 0; i < task->iovs_cnt; i++) { + /* iovs[0] is the head and iovs[iovs_cnt - 1] is the tail + * Middle is the data range + */ + for (i = 1; i < task->iovs_cnt - 1; i++) { if (task->dxfer_dir == BLK_DIR_TO_DEV) memcpy(bdev->data + offset, task->iovs[i].iov_base, task->iovs[i].iov_len); @@ -83,7 +86,7 @@ vhost_bdev_process_blk_commands(struct vhost_block_dev *bdev, "%s - passed IO buffer is not multiple of 512b" "(req_idx = %"PRIu16").\n", task->req->type ? "WRITE" : "READ", - task->head_idx); + task->req_idx); return VIRTIO_BLK_S_UNSUPP; } @@ -98,14 +101,10 @@ vhost_bdev_process_blk_commands(struct vhost_block_dev *bdev, "%s - passed IO buffer is not multiple of 512b" "(req_idx = %"PRIu16").\n", task->req->type ? "WRITE" : "READ", - task->head_idx); + task->req_idx); return VIRTIO_BLK_S_UNSUPP; } - if (task->readtype) { - fprintf(stderr, "type isn't right\n"); - return VIRTIO_BLK_S_IOERR; - } task->dxfer_dir = BLK_DIR_TO_DEV; vhost_bdev_blk_readwrite(bdev, task, task->req->sector, task->data_len); diff --git a/examples/vhost_blk/vhost_blk.c b/examples/vhost_blk/vhost_blk.c index 74c82a900..82037ea9e 100644 --- a/examples/vhost_blk/vhost_blk.c +++ b/examples/vhost_blk/vhost_blk.c @@ -26,15 +26,22 @@ #define MAX_TASK 12 -#define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \ +#define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) |\ (1ULL << VIRTIO_F_VERSION_1) |\ (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) +#define CTRLR_NAME "vhost.socket" + +enum CTRLR_WORKER_STATUS { + WORKER_STATE_START = 0, + WORKER_STATE_STOP, +}; + /* Path to folder where character device will be created. Can be set by user. */ static char dev_pathname[PATH_MAX] = ""; static sem_t exit_sem; -static int g_should_stop = -1; +static enum CTRLR_WORKER_STATUS worker_thread_status; struct vhost_blk_ctrlr * vhost_blk_ctrlr_find(const char *ctrlr_name) @@ -46,716 +53,478 @@ vhost_blk_ctrlr_find(const char *ctrlr_name) return g_vhost_ctrlr; } -static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len) +static uint64_t +gpa_to_vva(struct vhost_blk_ctrlr *ctrlr, uint64_t gpa, uint64_t *len) { - char path[PATH_MAX]; - struct vhost_blk_ctrlr *ctrlr; - int ret = 0; - - ret = rte_vhost_get_ifname(vid, path, PATH_MAX); - if (ret) { - fprintf(stderr, "Cannot get socket name\n"); - assert(ret != 0); - } - - ctrlr = vhost_blk_ctrlr_find(path); - if (!ctrlr) { - fprintf(stderr, "Controller is not ready\n"); - assert(ctrlr != NULL); - } - assert(ctrlr->mem != NULL); return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len); } -static struct vring_packed_desc * -descriptor_get_next_packed(struct rte_vhost_vring *vq, - uint16_t *idx) +static void +enqueue_task(struct vhost_blk_task *task) { - if (vq->desc_packed[*idx % vq->size].flags & VIRTQ_DESC_F_NEXT) { - *idx += 1; - return &vq->desc_packed[*idx % vq->size]; - } + struct vhost_blk_queue *vq = task->vq; + struct vring_used *used = vq->vring.used; - return NULL; -} + rte_vhost_set_last_inflight_io_split(task->ctrlr->vid, + vq->id, task->req_idx); -static bool -descriptor_has_next_packed(struct vring_packed_desc *cur_desc) -{ - return !!(cur_desc->flags & VRING_DESC_F_NEXT); -} + /* Fill out the next entry in the "used" ring. id = the + * index of the descriptor that contained the blk request. + * len = the total amount of data transferred for the blk + * request. We must report the correct len, for variable + * length blk CDBs, where we may return less data than + * allocated by the guest VM. + */ + used->ring[used->idx & (vq->vring.size - 1)].id = task->req_idx; + used->ring[used->idx & (vq->vring.size - 1)].len = task->data_len; + rte_smp_mb(); + used->idx++; + rte_smp_mb(); -static bool -descriptor_is_wr_packed(struct vring_packed_desc *cur_desc) -{ - return !!(cur_desc->flags & VRING_DESC_F_WRITE); + rte_vhost_clr_inflight_desc_split(task->ctrlr->vid, + vq->id, used->idx, task->req_idx); + + /* Send an interrupt back to the guest VM so that it knows + * a completion is ready to be processed. + */ + rte_vhost_vring_call(task->ctrlr->vid, vq->id); } -static struct rte_vhost_inflight_desc_packed * -inflight_desc_get_next(struct rte_vhost_inflight_info_packed *inflight_packed, - struct rte_vhost_inflight_desc_packed *cur_desc) +static void +enqueue_task_packed(struct vhost_blk_task *task) { - if (!!(cur_desc->flags & VIRTQ_DESC_F_NEXT)) - return &inflight_packed->desc[cur_desc->next]; + struct vhost_blk_queue *vq = task->vq; + struct vring_packed_desc *desc; - return NULL; + rte_vhost_set_last_inflight_io_packed(task->ctrlr->vid, vq->id, + task->inflight_idx); + + desc = &vq->vring.desc_packed[vq->last_used_idx]; + desc->id = task->buffer_id; + desc->addr = 0; + + rte_smp_mb(); + if (vq->used_wrap_counter) + desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED; + else + desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED); + rte_smp_mb(); + + rte_vhost_clr_inflight_desc_packed(task->ctrlr->vid, vq->id, + task->inflight_idx); + + vq->last_used_idx += task->chain_num; + if (vq->last_used_idx >= vq->vring.size) { + vq->last_used_idx -= vq->vring.size; + vq->used_wrap_counter = !vq->used_wrap_counter; + } + + /* Send an interrupt back to the guest VM so that it knows + * a completion is ready to be processed. + */ + rte_vhost_vring_call(task->ctrlr->vid, vq->id); } static bool -inflight_desc_has_next(struct rte_vhost_inflight_desc_packed *cur_desc) +descriptor_has_next_packed(struct vring_packed_desc *cur_desc) { return !!(cur_desc->flags & VRING_DESC_F_NEXT); } static bool -inflight_desc_is_wr(struct rte_vhost_inflight_desc_packed *cur_desc) +descriptor_has_next_split(struct vring_desc *cur_desc) { - return !!(cur_desc->flags & VRING_DESC_F_WRITE); + return !!(cur_desc->flags & VRING_DESC_F_NEXT); } -static void -inflight_process_payload_chain_packed(struct inflight_blk_task *task) +static int +desc_payload_to_iovs(struct vhost_blk_ctrlr *ctrlr, struct iovec *iovs, + uint32_t *iov_index, uintptr_t payload, uint64_t remaining) { - void *data; - uint64_t chunck_len; - struct vhost_blk_task *blk_task; - struct rte_vhost_inflight_desc_packed *desc; - - blk_task = &task->blk_task; - blk_task->iovs_cnt = 0; + void *vva; + uint64_t len; do { - desc = task->inflight_desc; - chunck_len = desc->len; - data = (void *)(uintptr_t)gpa_to_vva(blk_task->bdev->vid, - desc->addr, - &chunck_len); - if (!data || chunck_len != desc->len) { + if (*iov_index >= VHOST_BLK_MAX_IOVS) { + fprintf(stderr, "VHOST_BLK_MAX_IOVS reached\n"); + return -1; + } + len = remaining; + vva = (void *)(uintptr_t)gpa_to_vva(ctrlr, + payload, &len); + if (!vva || !len) { fprintf(stderr, "failed to translate desc address.\n"); - return; + return -1; } - blk_task->iovs[blk_task->iovs_cnt].iov_base = data; - blk_task->iovs[blk_task->iovs_cnt].iov_len = desc->len; - blk_task->data_len += desc->len; - blk_task->iovs_cnt++; - task->inflight_desc = inflight_desc_get_next( - task->inflight_packed, desc); - } while (inflight_desc_has_next(task->inflight_desc)); - - chunck_len = task->inflight_desc->len; - blk_task->status = (void *)(uintptr_t)gpa_to_vva( - blk_task->bdev->vid, task->inflight_desc->addr, &chunck_len); - if (!blk_task->status || chunck_len != task->inflight_desc->len) - fprintf(stderr, "failed to translate desc address.\n"); + iovs[*iov_index].iov_base = vva; + iovs[*iov_index].iov_len = len; + payload += len; + remaining -= len; + (*iov_index)++; + } while (remaining); + + return 0; } -static void -inflight_submit_completion_packed(struct inflight_blk_task *task, - uint32_t q_idx, uint16_t *used_id, - bool *used_wrap_counter) +static struct vring_desc * +vring_get_next_desc(struct vhost_blk_queue *vq, struct vring_desc *desc) { - struct vhost_blk_ctrlr *ctrlr; - struct rte_vhost_vring *vq; - struct vring_packed_desc *desc; - int ret; + if (descriptor_has_next_split(desc)) + return &vq->vring.desc[desc->next]; - ctrlr = vhost_blk_ctrlr_find(dev_pathname); - vq = task->blk_task.vq; - - ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx, - task->blk_task.head_idx); - if (ret != 0) - fprintf(stderr, "failed to set last inflight io\n"); - - desc = &vq->desc_packed[*used_id]; - desc->id = task->blk_task.buffer_id; - rte_smp_mb(); - if (*used_wrap_counter) - desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED; - else - desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED); - rte_smp_mb(); + return NULL; +} - *used_id += task->blk_task.iovs_cnt + 2; - if (*used_id >= vq->size) { - *used_id -= vq->size; - *used_wrap_counter = !(*used_wrap_counter); +static struct vring_packed_desc * +vring_get_next_desc_packed(struct vhost_blk_queue *vq, uint16_t *req_idx) +{ + if (descriptor_has_next_packed(&vq->vring.desc_packed[*req_idx])) { + *req_idx = (*req_idx + 1) % vq->vring.size; + return &vq->vring.desc_packed[*req_idx]; } - ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx, - task->blk_task.head_idx); - if (ret != 0) - fprintf(stderr, "failed to clear inflight io\n"); + return NULL; +} + +static struct rte_vhost_inflight_desc_packed * +vring_get_next_inflight_desc(struct vhost_blk_queue *vq, + struct rte_vhost_inflight_desc_packed *desc) +{ + if (!!(desc->flags & VRING_DESC_F_NEXT)) + return &vq->inflight_ring.inflight_packed->desc[desc->next]; - /* Send an interrupt back to the guest VM so that it knows - * a completion is ready to be processed. - */ - rte_vhost_vring_call(task->blk_task.bdev->vid, q_idx); + return NULL; } -static void -submit_completion_packed(struct vhost_blk_task *task, uint32_t q_idx, - uint16_t *used_id, bool *used_wrap_counter) +static int +setup_iovs_from_descs_split(struct vhost_blk_ctrlr *ctrlr, + struct vhost_blk_queue *vq, uint16_t req_idx, + struct iovec *iovs, uint32_t *iovs_idx, + uint32_t *payload) { - struct vhost_blk_ctrlr *ctrlr; - struct rte_vhost_vring *vq; - struct vring_packed_desc *desc; - int ret; + struct vring_desc *desc = &vq->vring.desc[req_idx]; - ctrlr = vhost_blk_ctrlr_find(dev_pathname); - vq = task->vq; + do { + /* does not support indirect descriptors */ + assert((desc->flags & VRING_DESC_F_INDIRECT) == 0); - ret = rte_vhost_set_last_inflight_io_packed(ctrlr->bdev->vid, q_idx, - task->inflight_idx); - if (ret != 0) - fprintf(stderr, "failed to set last inflight io\n"); + if (*iovs_idx >= VHOST_BLK_MAX_IOVS) { + fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n"); + return -1; + } - desc = &vq->desc_packed[*used_id]; - desc->id = task->buffer_id; - rte_smp_mb(); - if (*used_wrap_counter) - desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED; - else - desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED); - rte_smp_mb(); + if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx, + desc->addr, desc->len) != 0) { + fprintf(stderr, "Failed to convert desc payload to iovs\n"); + return -1; + } - *used_id += task->iovs_cnt + 2; - if (*used_id >= vq->size) { - *used_id -= vq->size; - *used_wrap_counter = !(*used_wrap_counter); - } + *payload += desc->len; - ret = rte_vhost_clr_inflight_desc_packed(ctrlr->bdev->vid, q_idx, - task->inflight_idx); - if (ret != 0) - fprintf(stderr, "failed to clear inflight io\n"); + desc = vring_get_next_desc(vq, desc); + } while (desc != NULL); - /* Send an interrupt back to the guest VM so that it knows - * a completion is ready to be processed. - */ - rte_vhost_vring_call(task->bdev->vid, q_idx); + return 0; } -static void -vhost_process_payload_chain_packed(struct vhost_blk_task *task, - uint16_t *idx) +static int +setup_iovs_from_descs_packed(struct vhost_blk_ctrlr *ctrlr, + struct vhost_blk_queue *vq, uint16_t req_idx, + struct iovec *iovs, uint32_t *iovs_idx, + uint32_t *payload) { - void *data; - uint64_t chunck_len; - - task->iovs_cnt = 0; + struct vring_packed_desc *desc = &vq->vring.desc_packed[req_idx]; do { - chunck_len = task->desc_packed->len; - data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, - task->desc_packed->addr, - &chunck_len); - if (!data || chunck_len != task->desc_packed->len) { - fprintf(stderr, "failed to translate desc address.\n"); - return; + /* does not support indirect descriptors */ + assert((desc->flags & VRING_DESC_F_INDIRECT) == 0); + + if (*iovs_idx >= VHOST_BLK_MAX_IOVS) { + fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n"); + return -1; } - task->iovs[task->iovs_cnt].iov_base = data; - task->iovs[task->iovs_cnt].iov_len = task->desc_packed->len; - task->data_len += task->desc_packed->len; - task->iovs_cnt++; - task->desc_packed = descriptor_get_next_packed(task->vq, idx); - } while (descriptor_has_next_packed(task->desc_packed)); - - task->last_idx = *idx % task->vq->size; - chunck_len = task->desc_packed->len; - task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, - task->desc_packed->addr, - &chunck_len); - if (!task->status || chunck_len != task->desc_packed->len) - fprintf(stderr, "failed to translate desc address.\n"); -} + if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx, + desc->addr, desc->len) != 0) { + fprintf(stderr, "Failed to convert desc payload to iovs\n"); + return -1; + } + *payload += desc->len; -static int -descriptor_is_available(struct rte_vhost_vring *vring, uint16_t idx, - bool avail_wrap_counter) -{ - uint16_t flags = vring->desc_packed[idx].flags; + desc = vring_get_next_desc_packed(vq, &req_idx); + } while (desc != NULL); - return ((!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter) && - (!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter)); + return 0; } -static void -process_requestq_packed(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx) +static int +setup_iovs_from_inflight_desc(struct vhost_blk_ctrlr *ctrlr, + struct vhost_blk_queue *vq, uint16_t req_idx, + struct iovec *iovs, uint32_t *iovs_idx, + uint32_t *payload) { - bool avail_wrap_counter, used_wrap_counter; - uint16_t avail_idx, used_idx; - int ret; - uint64_t chunck_len; - struct vhost_blk_queue *blk_vq; - struct rte_vhost_vring *vq; - struct vhost_blk_task *task; - - blk_vq = &ctrlr->bdev->queues[q_idx]; - vq = &blk_vq->vq; - - avail_idx = blk_vq->last_avail_idx; - avail_wrap_counter = blk_vq->avail_wrap_counter; - used_idx = blk_vq->last_used_idx; - used_wrap_counter = blk_vq->used_wrap_counter; - - task = rte_zmalloc(NULL, sizeof(*task), 0); - assert(task != NULL); - task->vq = vq; - task->bdev = ctrlr->bdev; + struct rte_vhost_ring_inflight *inflight_vq; + struct rte_vhost_inflight_desc_packed *desc; - while (descriptor_is_available(vq, avail_idx, avail_wrap_counter)) { - task->head_idx = avail_idx; - task->desc_packed = &task->vq->desc_packed[task->head_idx]; - task->iovs_cnt = 0; - task->data_len = 0; - task->req = NULL; - task->status = NULL; + inflight_vq = &vq->inflight_ring; + desc = &inflight_vq->inflight_packed->desc[req_idx]; + do { /* does not support indirect descriptors */ - assert((task->desc_packed->flags & VRING_DESC_F_INDIRECT) == 0); + assert((desc->flags & VRING_DESC_F_INDIRECT) == 0); - chunck_len = task->desc_packed->len; - task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, - task->desc_packed->addr, &chunck_len); - if (!task->req || chunck_len != task->desc_packed->len) { - fprintf(stderr, "failed to translate desc address.\n"); - rte_free(task); - return; + if (*iovs_idx >= VHOST_BLK_MAX_IOVS) { + fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n"); + return -1; } - task->desc_packed = descriptor_get_next_packed(task->vq, - &avail_idx); - assert(task->desc_packed != NULL); - if (!descriptor_has_next_packed(task->desc_packed)) { - task->dxfer_dir = BLK_DIR_NONE; - task->last_idx = avail_idx % vq->size; - chunck_len = task->desc_packed->len; - task->status = (void *)(uintptr_t) - gpa_to_vva(task->bdev->vid, - task->desc_packed->addr, - &chunck_len); - if (!task->status || - chunck_len != task->desc_packed->len) { - fprintf(stderr, - "failed to translate desc address.\n"); - rte_free(task); - return; - } - } else { - task->readtype = descriptor_is_wr_packed( - task->desc_packed); - vhost_process_payload_chain_packed(task, &avail_idx); - } - task->buffer_id = vq->desc_packed[task->last_idx].id; - rte_vhost_set_inflight_desc_packed(ctrlr->bdev->vid, q_idx, - task->head_idx, - task->last_idx, - &task->inflight_idx); - - if (++avail_idx >= vq->size) { - avail_idx -= vq->size; - avail_wrap_counter = !avail_wrap_counter; + if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx, + desc->addr, desc->len) != 0) { + fprintf(stderr, "Failed to convert desc payload to iovs\n"); + return -1; } - blk_vq->last_avail_idx = avail_idx; - blk_vq->avail_wrap_counter = avail_wrap_counter; - ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task); - if (ret) { - /* invalid response */ - *task->status = VIRTIO_BLK_S_IOERR; - } else { - /* successfully */ - *task->status = VIRTIO_BLK_S_OK; - } + *payload += desc->len; - submit_completion_packed(task, q_idx, &used_idx, - &used_wrap_counter); - blk_vq->last_used_idx = used_idx; - blk_vq->used_wrap_counter = used_wrap_counter; - } + desc = vring_get_next_inflight_desc(vq, desc); + } while (desc != NULL); - rte_free(task); + return 0; } static void -submit_inflight_vq_packed(struct vhost_blk_ctrlr *ctrlr, - uint16_t q_idx) +process_blk_task(struct vhost_blk_task *task) { - bool used_wrap_counter; - int req_idx, ret; - uint16_t used_idx; - uint64_t chunck_len; - struct vhost_blk_queue *blk_vq; - struct rte_vhost_ring_inflight *inflight_vq; - struct rte_vhost_resubmit_info *resubmit_info; - struct rte_vhost_vring *vq; - struct inflight_blk_task *task; - struct vhost_blk_task *blk_task; - struct rte_vhost_inflight_info_packed *inflight_info; - - blk_vq = &ctrlr->bdev->queues[q_idx]; - vq = &blk_vq->vq; - inflight_vq = &blk_vq->inflight_vq; - resubmit_info = inflight_vq->resubmit_inflight; - inflight_info = inflight_vq->inflight_packed; - used_idx = blk_vq->last_used_idx; - used_wrap_counter = blk_vq->used_wrap_counter; - - task = rte_malloc(NULL, sizeof(*task), 0); - if (!task) { - fprintf(stderr, "failed to allocate memory\n"); - return; - } - blk_task = &task->blk_task; - blk_task->vq = vq; - blk_task->bdev = ctrlr->bdev; - task->inflight_packed = inflight_vq->inflight_packed; - - while (resubmit_info->resubmit_num-- > 0) { - req_idx = resubmit_info->resubmit_num; - blk_task->head_idx = - resubmit_info->resubmit_list[req_idx].index; - task->inflight_desc = - &inflight_info->desc[blk_task->head_idx]; - task->blk_task.iovs_cnt = 0; - task->blk_task.data_len = 0; - task->blk_task.req = NULL; - task->blk_task.status = NULL; - - /* update the avail idx too - * as it's initial value equals to used idx - */ - blk_vq->last_avail_idx += task->inflight_desc->num; - if (blk_vq->last_avail_idx >= vq->size) { - blk_vq->last_avail_idx -= vq->size; - blk_vq->avail_wrap_counter = - !blk_vq->avail_wrap_counter; - } + uint32_t payload = 0; - /* does not support indirect descriptors */ - assert(task->inflight_desc != NULL); - assert((task->inflight_desc->flags & - VRING_DESC_F_INDIRECT) == 0); - - chunck_len = task->inflight_desc->len; - blk_task->req = (void *)(uintptr_t) - gpa_to_vva(blk_task->bdev->vid, - task->inflight_desc->addr, - &chunck_len); - if (!blk_task->req || - chunck_len != task->inflight_desc->len) { - fprintf(stderr, "failed to translate desc address.\n"); - rte_free(task); - return; - } + if (task->vq->packed_ring) { + struct rte_vhost_ring_inflight *inflight_ring; + struct rte_vhost_resubmit_info *resubmit_inflight; - task->inflight_desc = inflight_desc_get_next( - task->inflight_packed, task->inflight_desc); - assert(task->inflight_desc != NULL); - if (!inflight_desc_has_next(task->inflight_desc)) { - blk_task->dxfer_dir = BLK_DIR_NONE; - chunck_len = task->inflight_desc->len; - blk_task->status = (void *)(uintptr_t) - gpa_to_vva(blk_task->bdev->vid, - task->inflight_desc->addr, - &chunck_len); - if (!blk_task->status || - chunck_len != task->inflight_desc->len) { - fprintf(stderr, - "failed to translate desc address.\n"); - rte_free(task); + inflight_ring = &task->vq->inflight_ring; + resubmit_inflight = inflight_ring->resubmit_inflight; + + if (resubmit_inflight != NULL && + resubmit_inflight->resubmit_list != NULL) { + if (setup_iovs_from_inflight_desc(task->ctrlr, task->vq, + task->req_idx, task->iovs, &task->iovs_cnt, + &payload)) { + fprintf(stderr, "Failed to setup iovs\n"); return; } } else { - blk_task->readtype = - inflight_desc_is_wr(task->inflight_desc); - inflight_process_payload_chain_packed(task); + if (setup_iovs_from_descs_packed(task->ctrlr, task->vq, + task->req_idx, task->iovs, &task->iovs_cnt, + &payload)) { + fprintf(stderr, "Failed to setup iovs\n"); + return; + } } + } else { + if (setup_iovs_from_descs_split(task->ctrlr, task->vq, + task->req_idx, task->iovs, &task->iovs_cnt, &payload)) { + fprintf(stderr, "Failed to setup iovs\n"); + return; + } + } - blk_task->buffer_id = task->inflight_desc->id; - - ret = vhost_bdev_process_blk_commands(ctrlr->bdev, blk_task); - if (ret) - /* invalid response */ - *blk_task->status = VIRTIO_BLK_S_IOERR; - else - /* successfully */ - *blk_task->status = VIRTIO_BLK_S_OK; - - inflight_submit_completion_packed(task, q_idx, &used_idx, - &used_wrap_counter); + /* First IOV must be the req head. */ + task->req = (struct virtio_blk_outhdr *)task->iovs[0].iov_base; + assert(sizeof(*task->req) == task->iovs[0].iov_len); - blk_vq->last_used_idx = used_idx; - blk_vq->used_wrap_counter = used_wrap_counter; - } + /* Last IOV must be the status tail. */ + task->status = (uint8_t *)task->iovs[task->iovs_cnt - 1].iov_base; + assert(sizeof(*task->status) == task->iovs[task->iovs_cnt - 1].iov_len); - rte_free(task); -} + /* Transport data len */ + task->data_len = payload - task->iovs[0].iov_len - + task->iovs[task->iovs_cnt - 1].iov_len; -static struct vring_desc * -descriptor_get_next_split(struct vring_desc *vq_desc, - struct vring_desc *cur_desc) -{ - return &vq_desc[cur_desc->next]; -} + if (vhost_bdev_process_blk_commands(task->ctrlr->bdev, task)) + /* invalid response */ + *task->status = VIRTIO_BLK_S_IOERR; + else + /* successfully */ + *task->status = VIRTIO_BLK_S_OK; -static bool -descriptor_has_next_split(struct vring_desc *cur_desc) -{ - return !!(cur_desc->flags & VRING_DESC_F_NEXT); + if (task->vq->packed_ring) + enqueue_task_packed(task); + else + enqueue_task(task); } -static bool -descriptor_is_wr_split(struct vring_desc *cur_desc) +static void +blk_task_init(struct vhost_blk_task *task) { - return !!(cur_desc->flags & VRING_DESC_F_WRITE); + task->iovs_cnt = 0; + task->data_len = 0; + task->req = NULL; + task->status = NULL; } static void -vhost_process_payload_chain_split(struct vhost_blk_task *task) +submit_inflight_vq(struct vhost_blk_queue *vq) { - void *data; - uint64_t chunck_len; + struct rte_vhost_ring_inflight *inflight_ring; + struct rte_vhost_resubmit_info *resubmit_inflight; + struct vhost_blk_task *task; - task->iovs_cnt = 0; + inflight_ring = &vq->inflight_ring; + resubmit_inflight = inflight_ring->resubmit_inflight; - do { - chunck_len = task->desc_split->len; - data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, - task->desc_split->addr, - &chunck_len); - if (!data || chunck_len != task->desc_split->len) { - fprintf(stderr, "failed to translate desc address.\n"); - return; - } + if (resubmit_inflight == NULL || + resubmit_inflight->resubmit_num == 0) + return; - task->iovs[task->iovs_cnt].iov_base = data; - task->iovs[task->iovs_cnt].iov_len = task->desc_split->len; - task->data_len += task->desc_split->len; - task->iovs_cnt++; - task->desc_split = - descriptor_get_next_split(task->vq->desc, task->desc_split); - } while (descriptor_has_next_split(task->desc_split)); - - chunck_len = task->desc_split->len; - task->status = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, - task->desc_split->addr, - &chunck_len); - if (!task->status || chunck_len != task->desc_split->len) - fprintf(stderr, "failed to translate desc address.\n"); -} + fprintf(stdout, "Resubmit inflight num is %d\n", + resubmit_inflight->resubmit_num); -static void -submit_completion_split(struct vhost_blk_task *task, uint32_t vid, - uint32_t q_idx) -{ - struct rte_vhost_vring *vq; - struct vring_used *used; + while (resubmit_inflight->resubmit_num-- > 0) { + uint16_t desc_idx; - vq = task->vq; - used = vq->used; + desc_idx = resubmit_inflight->resubmit_list[ + resubmit_inflight->resubmit_num].index; - rte_vhost_set_last_inflight_io_split(vid, q_idx, task->req_idx); + if (vq->packed_ring) { + uint16_t task_idx; + struct rte_vhost_inflight_desc_packed *desc; - /* Fill out the next entry in the "used" ring. id = the - * index of the descriptor that contained the blk request. - * len = the total amount of data transferred for the blk - * request. We must report the correct len, for variable - * length blk CDBs, where we may return less data than - * allocated by the guest VM. - */ - used->ring[used->idx & (vq->size - 1)].id = task->req_idx; - used->ring[used->idx & (vq->size - 1)].len = task->data_len; - rte_smp_mb(); - used->idx++; - rte_smp_mb(); + desc = inflight_ring->inflight_packed->desc; + task_idx = desc[desc[desc_idx].last].id; + task = &vq->tasks[task_idx]; - rte_vhost_clr_inflight_desc_split(vid, q_idx, used->idx, task->req_idx); + task->req_idx = desc_idx; + task->chain_num = desc[desc_idx].num; + task->buffer_id = task_idx; + task->inflight_idx = desc_idx; - /* Send an interrupt back to the guest VM so that it knows - * a completion is ready to be processed. - */ - rte_vhost_vring_call(task->bdev->vid, q_idx); + vq->last_avail_idx += desc[desc_idx].num; + if (vq->last_avail_idx >= vq->vring.size) { + vq->last_avail_idx -= vq->vring.size; + vq->avail_wrap_counter = + !vq->avail_wrap_counter; + } + } else + /* In split ring, the desc_idx is the req_id + * which was initialized when allocated the task pool. + */ + task = &vq->tasks[desc_idx]; + + blk_task_init(task); + process_blk_task(task); + } + + free(resubmit_inflight->resubmit_list); + resubmit_inflight->resubmit_list = NULL; } -static void -submit_inflight_vq_split(struct vhost_blk_ctrlr *ctrlr, - uint32_t q_idx) +/* Use the buffer_id as the task_idx */ +static uint16_t +vhost_blk_vq_get_desc_chain_buffer_id(struct vhost_blk_queue *vq, + uint16_t *req_head, uint16_t *num) { - struct vhost_blk_queue *blk_vq; - struct rte_vhost_ring_inflight *inflight_vq; - struct rte_vhost_resubmit_info *resubmit_inflight; - struct rte_vhost_resubmit_desc *resubmit_list; - struct vhost_blk_task *task; - int req_idx; - uint64_t chunck_len; - int ret; + struct vring_packed_desc *desc = &vq->vring.desc_packed[ + vq->last_avail_idx]; - blk_vq = &ctrlr->bdev->queues[q_idx]; - inflight_vq = &blk_vq->inflight_vq; - resubmit_inflight = inflight_vq->resubmit_inflight; - resubmit_list = resubmit_inflight->resubmit_list; + *req_head = vq->last_avail_idx; + *num = 1; - task = rte_zmalloc(NULL, sizeof(*task), 0); - assert(task != NULL); + while (descriptor_has_next_packed(desc)) { + vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size; + desc = &vq->vring.desc_packed[vq->last_avail_idx]; + *num += 1; + } - task->ctrlr = ctrlr; - task->bdev = ctrlr->bdev; - task->vq = &blk_vq->vq; + /* Point to next desc */ + vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size; + if (vq->last_avail_idx < *req_head) + vq->avail_wrap_counter = !vq->avail_wrap_counter; - while (resubmit_inflight->resubmit_num-- > 0) { - req_idx = resubmit_list[resubmit_inflight->resubmit_num].index; - task->req_idx = req_idx; - task->desc_split = &task->vq->desc[task->req_idx]; - task->iovs_cnt = 0; - task->data_len = 0; - task->req = NULL; - task->status = NULL; + return desc->id; +} - /* does not support indirect descriptors */ - assert(task->desc_split != NULL); - assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0); +static uint16_t +vq_get_desc_idx(struct vhost_blk_queue *vq) +{ + uint16_t desc_idx; + uint16_t last_avail_idx; - chunck_len = task->desc_split->len; - task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, - task->desc_split->addr, &chunck_len); - if (!task->req || chunck_len != task->desc_split->len) { - fprintf(stderr, "failed to translate desc address.\n"); - rte_free(task); - return; - } + last_avail_idx = vq->last_avail_idx & (vq->vring.size - 1); + desc_idx = vq->vring.avail->ring[last_avail_idx]; + vq->last_avail_idx++; - task->desc_split = descriptor_get_next_split(task->vq->desc, - task->desc_split); - if (!descriptor_has_next_split(task->desc_split)) { - task->dxfer_dir = BLK_DIR_NONE; - chunck_len = task->desc_split->len; - task->status = (void *)(uintptr_t) - gpa_to_vva(task->bdev->vid, - task->desc_split->addr, - &chunck_len); - if (!task->status || - chunck_len != task->desc_split->len) { - fprintf(stderr, - "failed to translate desc address.\n"); - rte_free(task); - return; - } - } else { - task->readtype = - descriptor_is_wr_split(task->desc_split); - vhost_process_payload_chain_split(task); - } + return desc_idx; +} - ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task); - if (ret) { - /* invalid response */ - *task->status = VIRTIO_BLK_S_IOERR; - } else { - /* successfully */ - *task->status = VIRTIO_BLK_S_OK; - } - submit_completion_split(task, ctrlr->bdev->vid, q_idx); - } +static int +vhost_blk_vq_is_avail(struct vhost_blk_queue *vq) +{ + if (vq->packed_ring) { + uint16_t flags = vq->vring.desc_packed[ + vq->last_avail_idx].flags; + bool avail_wrap_counter = vq->avail_wrap_counter; - rte_free(task); + return (!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter && + !!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter); + } else { + if (vq->vring.avail->idx != vq->last_avail_idx) + return 1; + + return 0; + } } static void -process_requestq_split(struct vhost_blk_ctrlr *ctrlr, uint32_t q_idx) +process_vq(struct vhost_blk_queue *vq) { - int ret; - int req_idx; - uint16_t last_idx; - uint64_t chunck_len; - struct vhost_blk_queue *blk_vq; - struct rte_vhost_vring *vq; struct vhost_blk_task *task; - blk_vq = &ctrlr->bdev->queues[q_idx]; - vq = &blk_vq->vq; + if (vq->packed_ring) { + while (vhost_blk_vq_is_avail(vq)) { + uint16_t task_idx, req_idx, last_idx, chain_num; - task = rte_zmalloc(NULL, sizeof(*task), 0); - assert(task != NULL); - task->ctrlr = ctrlr; - task->bdev = ctrlr->bdev; - task->vq = vq; + task_idx = vhost_blk_vq_get_desc_chain_buffer_id(vq, + &req_idx, &chain_num); + task = &vq->tasks[task_idx]; - while (vq->avail->idx != blk_vq->last_avail_idx) { - last_idx = blk_vq->last_avail_idx & (vq->size - 1); - req_idx = vq->avail->ring[last_idx]; - task->req_idx = req_idx; - task->desc_split = &task->vq->desc[task->req_idx]; - task->iovs_cnt = 0; - task->data_len = 0; - task->req = NULL; - task->status = NULL; + blk_task_init(task); + task->req_idx = req_idx; + task->chain_num = chain_num; + task->buffer_id = task_idx; + last_idx = (req_idx + chain_num - 1) % vq->vring.size; - rte_vhost_set_inflight_desc_split(ctrlr->bdev->vid, q_idx, - task->req_idx); - - /* does not support indirect descriptors */ - assert((task->desc_split->flags & VRING_DESC_F_INDIRECT) == 0); + rte_vhost_set_inflight_desc_packed(task->ctrlr->vid, + vq->id, + task->req_idx, + last_idx, + &task->inflight_idx); - chunck_len = task->desc_split->len; - task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, - task->desc_split->addr, &chunck_len); - if (!task->req || chunck_len != task->desc_split->len) { - fprintf(stderr, "failed to translate desc address.\n"); - rte_free(task); - return; - } - - task->desc_split = descriptor_get_next_split(task->vq->desc, - task->desc_split); - if (!descriptor_has_next_split(task->desc_split)) { - task->dxfer_dir = BLK_DIR_NONE; - chunck_len = task->desc_split->len; - task->status = (void *)(uintptr_t) - gpa_to_vva(task->bdev->vid, - task->desc_split->addr, - &chunck_len); - if (!task->status || - chunck_len != task->desc_split->len) { - fprintf(stderr, - "failed to translate desc address.\n"); - rte_free(task); - return; - } - } else { - task->readtype = - descriptor_is_wr_split(task->desc_split); - vhost_process_payload_chain_split(task); + process_blk_task(task); } - blk_vq->last_avail_idx++; - - ret = vhost_bdev_process_blk_commands(ctrlr->bdev, task); - if (ret) { - /* invalid response */ - *task->status = VIRTIO_BLK_S_IOERR; - } else { - /* successfully */ - *task->status = VIRTIO_BLK_S_OK; + } else { + while (vhost_blk_vq_is_avail(vq)) { + uint16_t desc_idx; + + desc_idx = vq_get_desc_idx(vq); + task = &vq->tasks[desc_idx]; + + blk_task_init(task); + rte_vhost_set_inflight_desc_split(task->ctrlr->vid, + vq->id, + task->req_idx); + process_blk_task(task); } - - submit_completion_split(task, ctrlr->bdev->vid, q_idx); } - - rte_free(task); } static void * ctrlr_worker(void *arg) { struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg; - struct vhost_blk_queue *blk_vq; - struct rte_vhost_ring_inflight *inflight_vq; cpu_set_t cpuset; pthread_t thread; int i; @@ -774,106 +543,128 @@ ctrlr_worker(void *arg) CPU_SET(0, &cpuset); pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + for (i = 0; i < NUM_OF_BLK_QUEUES; i++) + submit_inflight_vq(&ctrlr->queues[i]); + + while (worker_thread_status != WORKER_STATE_STOP) + for (i = 0; i < NUM_OF_BLK_QUEUES; i++) + process_vq(&ctrlr->queues[i]); + + fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); + sem_post(&exit_sem); + return NULL; +} + +static int +alloc_task_pool(struct vhost_blk_ctrlr *ctrlr) +{ + struct vhost_blk_queue *vq; + int i, j; + for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { - blk_vq = &ctrlr->bdev->queues[i]; - inflight_vq = &blk_vq->inflight_vq; - if (inflight_vq->resubmit_inflight != NULL && - inflight_vq->resubmit_inflight->resubmit_num != 0) { - if (ctrlr->packed_ring) - submit_inflight_vq_packed(ctrlr, i); - else - submit_inflight_vq_split(ctrlr, i); + vq = &ctrlr->queues[i]; + + vq->tasks = rte_zmalloc(NULL, + sizeof(struct vhost_blk_task) * vq->vring.size, 0); + if (!vq->tasks) { + fprintf(stderr, "Failed to allocate task memory\n"); + return -1; } - } - while (!g_should_stop && ctrlr->bdev != NULL) { - for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { - if (ctrlr->packed_ring) - process_requestq_packed(ctrlr, i); - else - process_requestq_split(ctrlr, i); + for (j = 0; j < vq->vring.size; j++) { + vq->tasks[j].req_idx = j; + vq->tasks[j].ctrlr = ctrlr; + vq->tasks[j].vq = vq; } } - g_should_stop = 2; - fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); - sem_post(&exit_sem); - return NULL; + return 0; +} + +static void +free_task_pool(struct vhost_blk_ctrlr *ctrlr) +{ + int i; + + for (i = 0; i < NUM_OF_BLK_QUEUES; i++) + rte_free(ctrlr->queues[i].tasks); } static int new_device(int vid) { struct vhost_blk_ctrlr *ctrlr; - struct vhost_blk_queue *blk_vq; - struct rte_vhost_vring *vq; + struct vhost_blk_queue *vq; + char path[PATH_MAX]; uint64_t features; pthread_t tid; int i, ret; + bool packed_ring; - ctrlr = vhost_blk_ctrlr_find(dev_pathname); + ret = rte_vhost_get_ifname(vid, path, PATH_MAX); + if (ret) { + fprintf(stderr, "Failed to get the socket path\n"); + return -1; + } + + ctrlr = vhost_blk_ctrlr_find(path); if (!ctrlr) { - fprintf(stderr, "Controller is not ready\n"); + fprintf(stderr, "Failed to find controller\n"); return -1; } if (ctrlr->started) return 0; - ctrlr->bdev->vid = vid; + ctrlr->vid = vid; ret = rte_vhost_get_negotiated_features(vid, &features); if (ret) { - fprintf(stderr, "failed to get the negotiated features\n"); + fprintf(stderr, "Failed to get the negotiated features\n"); return -1; } - ctrlr->packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED)); - - ret = rte_vhost_get_mem_table(vid, &ctrlr->mem); - if (ret) - fprintf(stderr, "Get Controller memory region failed\n"); - assert(ctrlr->mem != NULL); + packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED)); /* Disable Notifications and init last idx */ for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { - blk_vq = &ctrlr->bdev->queues[i]; - vq = &blk_vq->vq; - - ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq); - assert(ret == 0); - - ret = rte_vhost_get_vring_base(ctrlr->bdev->vid, i, - &blk_vq->last_avail_idx, - &blk_vq->last_used_idx); - assert(ret == 0); - - ret = rte_vhost_get_vhost_ring_inflight(ctrlr->bdev->vid, i, - &blk_vq->inflight_vq); - assert(ret == 0); - - if (ctrlr->packed_ring) { + vq = &ctrlr->queues[i]; + vq->id = i; + + assert(rte_vhost_get_vhost_vring(ctrlr->vid, i, + &vq->vring) == 0); + assert(rte_vhost_get_vring_base(ctrlr->vid, i, + &vq->last_avail_idx, + &vq->last_used_idx) == 0); + assert(rte_vhost_get_vhost_ring_inflight(ctrlr->vid, i, + &vq->inflight_ring) == 0); + + if (packed_ring) { /* for the reconnection */ - ret = rte_vhost_get_vring_base_from_inflight( - ctrlr->bdev->vid, i, - &blk_vq->last_avail_idx, - &blk_vq->last_used_idx); - assert(ret == 0); + assert(rte_vhost_get_vring_base_from_inflight( + ctrlr->vid, i, + &vq->last_avail_idx, + &vq->last_used_idx) == 0); - blk_vq->avail_wrap_counter = blk_vq->last_avail_idx & + vq->avail_wrap_counter = vq->last_avail_idx & (1 << 15); - blk_vq->last_avail_idx = blk_vq->last_avail_idx & + vq->last_avail_idx = vq->last_avail_idx & 0x7fff; - blk_vq->used_wrap_counter = blk_vq->last_used_idx & + vq->used_wrap_counter = vq->last_used_idx & (1 << 15); - blk_vq->last_used_idx = blk_vq->last_used_idx & + vq->last_used_idx = vq->last_used_idx & 0x7fff; } + vq->packed_ring = packed_ring; rte_vhost_enable_guest_notification(vid, i, 0); } + assert(rte_vhost_get_mem_table(vid, &ctrlr->mem) == 0); + assert(ctrlr->mem != NULL); + assert(alloc_task_pool(ctrlr) == 0); + /* start polling vring */ - g_should_stop = 0; - fprintf(stdout, "New Device %s, Device ID %d\n", dev_pathname, vid); + worker_thread_status = WORKER_STATE_START; + fprintf(stdout, "New Device %s, Device ID %d\n", path, vid); if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) { fprintf(stderr, "Worker Thread Started Failed\n"); return -1; @@ -890,7 +681,7 @@ destroy_device(int vid) { char path[PATH_MAX]; struct vhost_blk_ctrlr *ctrlr; - struct vhost_blk_queue *blk_vq; + struct vhost_blk_queue *vq; int i, ret; ret = rte_vhost_get_ifname(vid, path, PATH_MAX); @@ -909,27 +700,27 @@ destroy_device(int vid) if (!ctrlr->started) return; - g_should_stop = 1; - while (g_should_stop != 2) - ; + worker_thread_status = WORKER_STATE_STOP; + sem_wait(&exit_sem); for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { - blk_vq = &ctrlr->bdev->queues[i]; - if (ctrlr->packed_ring) { - blk_vq->last_avail_idx |= (blk_vq->avail_wrap_counter << + vq = &ctrlr->queues[i]; + if (vq->packed_ring) { + vq->last_avail_idx |= (vq->avail_wrap_counter << 15); - blk_vq->last_used_idx |= (blk_vq->used_wrap_counter << + vq->last_used_idx |= (vq->used_wrap_counter << 15); } - rte_vhost_set_vring_base(ctrlr->bdev->vid, i, - blk_vq->last_avail_idx, - blk_vq->last_used_idx); + + rte_vhost_set_vring_base(ctrlr->vid, i, + vq->last_avail_idx, + vq->last_used_idx); } + free_task_pool(ctrlr); free(ctrlr->mem); ctrlr->started = 0; - sem_wait(&exit_sem); } static int @@ -964,13 +755,13 @@ vhost_blk_bdev_construct(const char *bdev_name, bdev->blockcnt = blk_cnt; bdev->write_cache = wce_enable; - fprintf(stdout, "blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen, + fprintf(stdout, "Blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen, bdev->blockcnt); /* use memory as disk storage space */ bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0); if (!bdev->data) { - fprintf(stderr, "no enough reserved huge memory for disk\n"); + fprintf(stderr, "No enough reserved huge memory for disk\n"); free(bdev); return NULL; } @@ -997,7 +788,7 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name) unlink(dev_pathname); if (rte_vhost_driver_register(dev_pathname, 0) != 0) { - fprintf(stderr, "socket %s already exists\n", dev_pathname); + fprintf(stderr, "Socket %s already exists\n", dev_pathname); return NULL; } @@ -1008,7 +799,7 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name) return NULL; } - /* set proper features */ + /* set vhost user protocol features */ vhost_dev_install_rte_compat_hooks(dev_pathname); ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE); @@ -1033,28 +824,32 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name) } static void -signal_handler(__rte_unused int signum) +vhost_blk_ctrlr_destroy(struct vhost_blk_ctrlr *ctrlr) { - struct vhost_blk_ctrlr *ctrlr; - - unlink(dev_pathname); + if (ctrlr->bdev != NULL) { + if (ctrlr->bdev->data != NULL) + rte_free(ctrlr->bdev->data); - if (g_should_stop != -1) { - g_should_stop = 1; - while (g_should_stop != 2) - ; + rte_free(ctrlr->bdev); } + rte_free(ctrlr); + + rte_vhost_driver_unregister(dev_pathname); +} + +static void +signal_handler(__rte_unused int signum) +{ + struct vhost_blk_ctrlr *ctrlr; ctrlr = vhost_blk_ctrlr_find(dev_pathname); - if (ctrlr != NULL) { - if (ctrlr->bdev != NULL) { - rte_free(ctrlr->bdev->data); - rte_free(ctrlr->bdev); - } - rte_free(ctrlr); - } + if (ctrlr == NULL) + return; - rte_vhost_driver_unregister(dev_pathname); + if (ctrlr->started) + destroy_device(ctrlr->vid); + + vhost_blk_ctrlr_destroy(ctrlr); exit(0); } @@ -1062,14 +857,12 @@ int main(int argc, char *argv[]) { int ret; - signal(SIGINT, signal_handler); - /* init EAL */ ret = rte_eal_init(argc, argv); if (ret < 0) rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); - g_vhost_ctrlr = vhost_blk_ctrlr_construct("vhost.socket"); + g_vhost_ctrlr = vhost_blk_ctrlr_construct(CTRLR_NAME); if (g_vhost_ctrlr == NULL) { fprintf(stderr, "Construct vhost blk controller failed\n"); return 0; @@ -1080,6 +873,8 @@ int main(int argc, char *argv[]) return -1; } + signal(SIGINT, signal_handler); + rte_vhost_driver_start(dev_pathname); /* loop for exit the application */ diff --git a/examples/vhost_blk/vhost_blk.h b/examples/vhost_blk/vhost_blk.h index 933e2b7c5..4a8040588 100644 --- a/examples/vhost_blk/vhost_blk.h +++ b/examples/vhost_blk/vhost_blk.h @@ -30,12 +30,18 @@ struct vring_packed_desc { #endif struct vhost_blk_queue { - struct rte_vhost_vring vq; - struct rte_vhost_ring_inflight inflight_vq; + struct rte_vhost_vring vring; + struct rte_vhost_ring_inflight inflight_ring; + uint16_t last_avail_idx; uint16_t last_used_idx; + uint16_t id; + bool avail_wrap_counter; bool used_wrap_counter; + bool packed_ring; + + struct vhost_blk_task *tasks; }; #define NUM_OF_BLK_QUEUES 1 @@ -43,10 +49,6 @@ struct vhost_blk_queue { #define min(a, b) (((a) < (b)) ? (a) : (b)) struct vhost_block_dev { - /** ID for vhost library. */ - int vid; - /** Queues for the block device */ - struct vhost_blk_queue queues[NUM_OF_BLK_QUEUES]; /** Unique name for this block device. */ char name[64]; @@ -68,8 +70,10 @@ struct vhost_block_dev { struct vhost_blk_ctrlr { uint8_t started; - uint8_t packed_ring; - uint8_t need_restart; + /** ID for vhost library. */ + int vid; + /** Queues for the block device */ + struct vhost_blk_queue queues[NUM_OF_BLK_QUEUES]; /** Only support 1 LUN for the example */ struct vhost_block_dev *bdev; /** VM memory region */ @@ -85,31 +89,20 @@ enum blk_data_dir { }; struct vhost_blk_task { - uint8_t readtype; uint8_t req_idx; - uint16_t head_idx; - uint16_t last_idx; + uint16_t chain_num; uint16_t inflight_idx; uint16_t buffer_id; uint32_t dxfer_dir; uint32_t data_len; - struct virtio_blk_outhdr *req; + struct virtio_blk_outhdr *req; volatile uint8_t *status; - struct iovec iovs[VHOST_BLK_MAX_IOVS]; uint32_t iovs_cnt; - struct vring_packed_desc *desc_packed; - struct vring_desc *desc_split; - struct rte_vhost_vring *vq; - struct vhost_block_dev *bdev; - struct vhost_blk_ctrlr *ctrlr; -}; -struct inflight_blk_task { - struct vhost_blk_task blk_task; - struct rte_vhost_inflight_desc_packed *inflight_desc; - struct rte_vhost_inflight_info_packed *inflight_packed; + struct vhost_blk_queue *vq; + struct vhost_blk_ctrlr *ctrlr; }; struct vhost_blk_ctrlr *g_vhost_ctrlr; -- 2.17.2