* [dpdk-dev] [PATCH] [v1]vhost: support inflight share memory protocol feature @ 2019-06-26 16:38 JinYu 2019-07-05 8:47 ` Maxime Coquelin 2019-07-08 17:53 ` [dpdk-dev] [PATCH] [v2, 1/2]vhost: " JinYu 0 siblings, 2 replies; 13+ messages in thread From: JinYu @ 2019-06-26 16:38 UTC (permalink / raw) To: dev; +Cc: changpeng.liu, JinYu, LinLi, XunNi, YuZhang This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer between qemu and backend. Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer from backend. Then qemu should send it back through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. This shared buffer is used to process inflight I/O when backend reconnect. Signed-off-by: LinLi <lilin24@baidu.com> Signed-off-by: XunNi <nixun@baidu.com> Signed-off-by: YuZhang <zhangyu31@baidu.com> Signed-off-by: JinYu <jin.yu@intel.com> --- V1 - specify the APIs are split-ring only --- lib/librte_vhost/rte_vhost.h | 99 +++++++++ lib/librte_vhost/rte_vhost_version.map | 4 + lib/librte_vhost/vhost.c | 135 ++++++++++++ lib/librte_vhost/vhost.h | 12 + lib/librte_vhost/vhost_user.c | 292 +++++++++++++++++++++++++ lib/librte_vhost/vhost_user.h | 13 +- 6 files changed, 554 insertions(+), 1 deletion(-) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 0226b3eff..c9959c15a 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -71,6 +71,10 @@ extern "C" { #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 #endif +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + /** Indicate whether protocol features negotiation is supported. */ #ifndef VHOST_USER_F_PROTOCOL_FEATURES #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -98,12 +102,41 @@ struct rte_vhost_memory { struct rte_vhost_mem_region regions[]; }; +struct inflight_desc_split { + uint8_t inflight; + uint8_t padding[5]; + uint16_t next; + uint64_t counter; +}; + +struct inflight_info_split { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t last_inflight_io; + uint16_t used_idx; + struct inflight_desc_split desc[0]; +}; + +struct resubmit_desc { + uint16_t index; + uint64_t counter; +}; + +struct resubmit_info { + struct resubmit_desc *resubmit_list; + uint16_t resubmit_num; +}; + struct rte_vhost_vring { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; uint64_t log_guest_addr; + struct inflight_info_split *inflight; + struct resubmit_info *resubmit_inflight; + /** Deprecated, use rte_vhost_vring_call() instead. */ int callfd; @@ -603,6 +636,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, */ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); +/** + * Get guest vring info, including the vring address, vring size, inflight, etc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested vring info + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vhost_vring_with_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_vring *vring); + /** * Get guest vring info, including the vring address, vring size, etc. * @@ -631,6 +680,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, */ int rte_vhost_vring_call(int vid, uint16_t vring_idx); +/** + * set inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t idx); + +/** + * clear inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param last_used_idx + * next free used_idx + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx); + +/** + * set last inflight io index. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, + uint16_t idx); + /** * Get vhost RX queue avail count. * diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 5f1d4a75c..709593701 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -87,4 +87,8 @@ EXPERIMENTAL { rte_vdpa_relay_vring_used; rte_vhost_extern_callback_register; rte_vhost_driver_set_protocol_features; + rte_vhost_set_inflight_desc_split; + rte_vhost_clr_inflight_desc_split; + rte_vhost_set_last_inflight_io_split; + rte_vhost_get_vhost_vring_with_inflight_split; }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 981837b5d..ed40b5b58 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -240,6 +240,16 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy) close(vq->callfd); if (vq->kickfd >= 0) close(vq->kickfd); + if (vq->inflight) + vq->inflight = NULL; + if (vq->resubmit_inflight->resubmit_list) { + free(vq->resubmit_inflight->resubmit_list); + vq->resubmit_inflight->resubmit_list = NULL; + } + if (vq->resubmit_inflight) { + free(vq->resubmit_inflight); + vq->resubmit_inflight = NULL; + } } /* @@ -726,6 +736,39 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) return 0; } +int __rte_experimental +rte_vhost_get_vhost_vring_with_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_vring *vring) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (dev == NULL || vring == NULL) + return -1; + + if (vring_idx >= VHOST_MAX_VRING) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + vring->desc = vq->desc; + vring->avail = vq->avail; + vring->used = vq->used; + vring->log_guest_addr = vq->log_guest_addr; + + vring->callfd = vq->callfd; + vring->kickfd = vq->kickfd; + vring->size = vq->size; + + vring->inflight = vq->inflight; + vring->resubmit_inflight = vq->resubmit_inflight; + + return 0; +} + int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring) @@ -781,6 +824,98 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) return 0; } +int +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight)) + return -1; + + vq->inflight->desc[idx].counter = vq->counter++; + vq->inflight->desc[idx].inflight = 1; + return 0; +} + +int +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight)) + return -1; + + rte_compiler_barrier(); + + vq->inflight->desc[idx].inflight = 0; + + rte_compiler_barrier(); + + vq->inflight->used_idx = last_used_idx; + return 0; +} + +int +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + if (unlikely(!vq->inflight)) + return -1; + + vq->inflight->last_inflight_io = idx; + return 0; +} + uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 884befa85..c18356c15 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -128,6 +128,11 @@ struct vhost_virtqueue { /* Physical address of used ring, for logging */ uint64_t log_guest_addr; + /* Inflight share memory info */ + struct inflight_info_split *inflight; + struct resubmit_info *resubmit_inflight; + uint64_t counter; + uint16_t nr_zmbuf; uint16_t zmbuf_size; uint16_t last_zmbuf_idx; @@ -286,6 +291,12 @@ struct guest_page { uint64_t size; }; +struct inflight_mem_info { + int fd; + void *addr; + uint64_t size; +}; + /** * Device structure contains all configuration information relating * to the device. @@ -303,6 +314,7 @@ struct virtio_net { uint32_t nr_vring; int dequeue_zero_copy; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; + struct inflight_mem_info inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) char ifname[IF_NAME_SZ]; uint64_t log_size; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index c9e29ece8..41dfeb3b2 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -31,6 +31,8 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <assert.h> +#include <sys/syscall.h> +#include <asm/unistd.h> #ifdef RTE_LIBRTE_VHOST_NUMA #include <numaif.h> #endif @@ -49,6 +51,15 @@ #define VIRTIO_MIN_MTU 68 #define VIRTIO_MAX_MTU 65535 +#define INFLIGHT_ALIGNMENT 64 +#define INFLIGHT_VERSION 0xabcd +#define VIRTQUEUE_MAX_SIZE 1024 + +#define CLOEXEC 0x0001U + +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", @@ -78,6 +89,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN", [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END", + [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD", + [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD", }; static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg); @@ -160,6 +173,16 @@ vhost_backend_cleanup(struct virtio_net *dev) dev->log_addr = 0; } + if (dev->inflight_info.addr) { + munmap(dev->inflight_info.addr, dev->inflight_info.size); + dev->inflight_info.addr = NULL; + } + + if (dev->inflight_info.fd > 0) { + close(dev->inflight_info.fd); + dev->inflight_info.fd = -1; + } + if (dev->slave_req_fd >= 0) { close(dev->slave_req_fd); dev->slave_req_fd = -1; @@ -1165,6 +1188,184 @@ virtio_is_ready(struct virtio_net *dev) return 1; } +static int mem_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} + +void *inflight_mem_alloc(const char *name, size_t size, int *fd) +{ + void *ptr; + int mfd = -1; + char fname[20] = "/tmp/memfd-XXXXXX"; + + *fd = -1; + mfd = mem_create(name, CLOEXEC); + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } else { + mfd = mkstemp(fname); + unlink(fname); + + if (mfd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "mkstemp fail for alloc inflight buffer\n"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +static uint32_t get_pervq_shm_size_split(uint16_t queue_size) +{ + return ALIGN_UP(sizeof(struct inflight_desc_split) * queue_size + + sizeof(uint64_t) * 1 + sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT); +} + +static int +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd; + uint64_t mmap_size; + void *addr; + uint16_t num_queues, queue_size; + struct virtio_net *dev = *pdev; + + if (msg->size != sizeof(msg->payload.inflight)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Invalid get_inflight_fd message size is %d", + msg->size); + return RTE_VHOST_MSG_RESULT_ERR; + } + + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n", + msg->payload.inflight.num_queues); + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n", + msg->payload.inflight.queue_size); + mmap_size = num_queues * get_pervq_shm_size_split(queue_size); + + addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd); + if (!addr) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost inflight area"); + msg->payload.inflight.mmap_size = 0; + return RTE_VHOST_MSG_RESULT_ERR; + } + memset(addr, 0, mmap_size); + + dev->inflight_info.addr = addr; + dev->inflight_info.size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info.fd = msg->fds[0] = fd; + msg->payload.inflight.mmap_offset = 0; + msg->fd_num = 1; + + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_size: %lu\n", + msg->payload.inflight.mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_offset: %lu\n", + msg->payload.inflight.mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight fd: %d\n", msg->fds[0]); + + return RTE_VHOST_MSG_RESULT_REPLY; +} + +static int +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd, i; + uint64_t mmap_size, mmap_offset; + uint16_t num_queues, queue_size; + uint32_t pervq_inflight_size; + void *addr; + struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; + + fd = msg->fds[0]; + if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd message size is %d,fd is %d\n", + msg->size, fd); + return RTE_VHOST_MSG_RESULT_ERR; + } + + mmap_size = msg->payload.inflight.mmap_size; + mmap_offset = msg->payload.inflight.mmap_offset; + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + pervq_inflight_size = get_pervq_shm_size_split(queue_size); + + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_size: %lu\n", mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_offset: %lu\n", mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd num_queues: %u\n", num_queues); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd queue_size: %u\n", queue_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd fd: %d\n", fd); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd pervq_inflight_size: %d\n", + pervq_inflight_size); + + if (dev->inflight_info.addr) + munmap(dev->inflight_info.addr, dev->inflight_info.size); + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + if (dev->inflight_info.fd) + close(dev->inflight_info.fd); + + dev->inflight_info.fd = fd; + dev->inflight_info.addr = addr; + dev->inflight_info.size = mmap_size; + + for (i = 0; i < num_queues; i++) { + vq = dev->virtqueue[i]; + vq->inflight = (struct inflight_info_split *)addr; + vq->inflight->desc_num = queue_size; + addr = (void *)((char *)addr + pervq_inflight_size); + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1201,6 +1402,89 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused, return RTE_VHOST_MSG_RESULT_OK; } +static int +resubmit_desc_compare(const void *a, const void *b) +{ + const struct resubmit_desc *desc0 = (const struct resubmit_desc *)a; + const struct resubmit_desc *desc1 = (const struct resubmit_desc *)b; + + if (desc1->counter > desc0->counter && + (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) + return 1; + + return -1; +} + +static int +vhost_check_queue_inflights_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct vring_used *used = vq->used; + uint16_t i = 0; + uint16_t resubmit_num = 0; + struct resubmit_info *resubmit = NULL; + + if (!(dev->features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + + if ((!vq->inflight)) + return RTE_VHOST_MSG_RESULT_ERR; + + if (!vq->inflight->version) { + vq->inflight->version = INFLIGHT_VERSION; + return RTE_VHOST_MSG_RESULT_OK; + } + + vq->resubmit_inflight = NULL; + vq->counter = 0; + + if (vq->inflight->used_idx != used->idx) { + vq->inflight->desc[vq->inflight->last_inflight_io].inflight = 0; + rte_compiler_barrier(); + vq->inflight->used_idx = used->idx; + } + + for (i = 0; i < vq->inflight->desc_num; i++) { + if (vq->inflight->desc[i].inflight == 1) + resubmit_num++; + } + + vq->last_avail_idx += resubmit_num; + + if (resubmit_num) { + resubmit = calloc(1, sizeof(struct resubmit_info)); + if (!resubmit) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for resubmit info.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + resubmit->resubmit_list = calloc(resubmit_num, + sizeof(struct resubmit_desc)); + if (!resubmit->resubmit_list) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for inflight desc.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + for (i = 0; i < vq->inflight->desc_num; i++) { + if (vq->inflight->desc[i].inflight == 1) { + resubmit->resubmit_list[resubmit->resubmit_num].index = i; + resubmit->resubmit_list[resubmit->resubmit_num].counter = + vq->inflight->desc[i].counter; + resubmit->resubmit_num++; + } + } + + if (resubmit->resubmit_num > 1) + qsort(resubmit->resubmit_list, resubmit->resubmit_num, + sizeof(struct resubmit_desc), resubmit_desc_compare); + + vq->counter = resubmit->resubmit_list[0].counter + 1; + vq->resubmit_inflight = resubmit; + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1242,6 +1526,12 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, close(vq->kickfd); vq->kickfd = file.fd; + if (vhost_check_queue_inflights_split(dev, vq)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to inflights for vq: %d\n", file.index); + return RTE_VHOST_MSG_RESULT_ERR; + } + return RTE_VHOST_MSG_RESULT_OK; } @@ -1762,6 +2052,8 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, + [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, + [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, }; diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 2a650fe4b..99a773910 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -54,7 +54,9 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_POSTCOPY_END = 30, - VHOST_USER_MAX = 31 + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_MAX = 33 } VhostUserRequest; typedef enum VhostUserSlaveRequest { @@ -112,6 +114,13 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + typedef struct VhostUserMsg { union { uint32_t master; /* a VhostUserRequest value */ @@ -131,6 +140,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + VhostUserInflight inflight; struct vhost_iotlb_msg iotlb; VhostUserCryptoSessionParam crypto_session; VhostUserVringArea area; @@ -148,6 +158,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); +void *inflight_mem_alloc(const char *name, size_t size, int *fd); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, -- 2.17.2 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] [v1]vhost: support inflight share memory protocol feature 2019-06-26 16:38 [dpdk-dev] [PATCH] [v1]vhost: support inflight share memory protocol feature JinYu @ 2019-07-05 8:47 ` Maxime Coquelin 2019-07-08 7:23 ` Yu, Jin 2019-07-08 17:53 ` [dpdk-dev] [PATCH] [v2, 1/2]vhost: " JinYu 1 sibling, 1 reply; 13+ messages in thread From: Maxime Coquelin @ 2019-07-05 8:47 UTC (permalink / raw) To: JinYu, dev; +Cc: changpeng.liu, LinLi, XunNi, YuZhang, Tiwei Bie Hi Jin, On 6/26/19 6:38 PM, JinYu wrote: > This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD > and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared > buffer between qemu and backend. > > Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the > shared buffer from backend. Then qemu should send it back > through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. > > This shared buffer is used to process inflight I/O when backend > reconnect. > > Signed-off-by: LinLi <lilin24@baidu.com> > Signed-off-by: XunNi <nixun@baidu.com> > Signed-off-by: YuZhang <zhangyu31@baidu.com> > Signed-off-by: JinYu <jin.yu@intel.com> > --- > V1 - specify the APIs are split-ring only > --- > lib/librte_vhost/rte_vhost.h | 99 +++++++++ > lib/librte_vhost/rte_vhost_version.map | 4 + > lib/librte_vhost/vhost.c | 135 ++++++++++++ > lib/librte_vhost/vhost.h | 12 + > lib/librte_vhost/vhost_user.c | 292 +++++++++++++++++++++++++ > lib/librte_vhost/vhost_user.h | 13 +- > 6 files changed, 554 insertions(+), 1 deletion(-) > > diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h > index 0226b3eff..c9959c15a 100644 > --- a/lib/librte_vhost/rte_vhost.h > +++ b/lib/librte_vhost/rte_vhost.h > @@ -71,6 +71,10 @@ extern "C" { > #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 > #endif > > +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD > +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 > +#endif > + > /** Indicate whether protocol features negotiation is supported. */ > #ifndef VHOST_USER_F_PROTOCOL_FEATURES > #define VHOST_USER_F_PROTOCOL_FEATURES 30 > @@ -98,12 +102,41 @@ struct rte_vhost_memory { > struct rte_vhost_mem_region regions[]; > }; > > +struct inflight_desc_split { > + uint8_t inflight; > + uint8_t padding[5]; > + uint16_t next; > + uint64_t counter; > +}; > + > +struct inflight_info_split { > + uint64_t features; > + uint16_t version; > + uint16_t desc_num; > + uint16_t last_inflight_io; > + uint16_t used_idx; > + struct inflight_desc_split desc[0]; > +}; > + > +struct resubmit_desc { > + uint16_t index; > + uint64_t counter; > +}; > + > +struct resubmit_info { > + struct resubmit_desc *resubmit_list; > + uint16_t resubmit_num; > +}; > + > struct rte_vhost_vring { > struct vring_desc *desc; > struct vring_avail *avail; > struct vring_used *used; > uint64_t log_guest_addr; > > + struct inflight_info_split *inflight; > + struct resubmit_info *resubmit_inflight; > + This is breaking the ABI, and discussing with Tiwei, we think it could be avoided by defining its own structure of inflight and inflight resubmit pointers. More below: > /** Deprecated, use rte_vhost_vring_call() instead. */ > int callfd; > > @@ -603,6 +636,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, > */ > int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); > > +/** > + * Get guest vring info, including the vring address, vring size, inflight, etc. > + * > + * @param vid > + * vhost device ID > + * @param vring_idx > + * vring index > + * @param vring > + * the structure to hold the requested vring info > + * @return > + * 0 on success, -1 on failure > + */ > +int __rte_experimental > +rte_vhost_get_vhost_vring_with_inflight_split(int vid, uint16_t vring_idx, > + struct rte_vhost_vring *vring); Here a new API is introduced to get vting info with inflight. It could be just changed to get only the inflight info. It means the caller would first call rte_vhost_get_vhost_vring() and then the new function, which I guess may be renamed to: rte_vhost_get_vhost_vring_inflight_split Other than that, the patch looks good to me. (Next time, please run get_maintainer.sh script so that all the maintainers are in Cc:). Thanks, Maxime ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH] [v1]vhost: support inflight share memory protocol feature 2019-07-05 8:47 ` Maxime Coquelin @ 2019-07-08 7:23 ` Yu, Jin 0 siblings, 0 replies; 13+ messages in thread From: Yu, Jin @ 2019-07-08 7:23 UTC (permalink / raw) To: Maxime Coquelin, dev; +Cc: Liu, Changpeng, LinLi, XunNi, YuZhang, Bie, Tiwei > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com] > Sent: Friday, July 5, 2019 4:47 PM > To: Yu, Jin <jin.yu@intel.com>; dev@dpdk.org > Cc: Liu, Changpeng <changpeng.liu@intel.com>; LinLi <lilin24@baidu.com>; > XunNi <nixun@baidu.com>; YuZhang <zhangyu31@baidu.com>; Bie, Tiwei > <tiwei.bie@intel.com> > Subject: Re: [dpdk-dev] [PATCH] [v1]vhost: support inflight share memory > protocol feature > > Hi Jin, > > On 6/26/19 6:38 PM, JinYu wrote: > > This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD > and > > VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer > > between qemu and backend. > > > > Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer > > from backend. Then qemu should send it back through > > VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. > > > > This shared buffer is used to process inflight I/O when backend > > reconnect. > > > > Signed-off-by: LinLi <lilin24@baidu.com> > > Signed-off-by: XunNi <nixun@baidu.com> > > Signed-off-by: YuZhang <zhangyu31@baidu.com> > > Signed-off-by: JinYu <jin.yu@intel.com> > > --- > > V1 - specify the APIs are split-ring only > > --- > > lib/librte_vhost/rte_vhost.h | 99 +++++++++ > > lib/librte_vhost/rte_vhost_version.map | 4 + > > lib/librte_vhost/vhost.c | 135 ++++++++++++ > > lib/librte_vhost/vhost.h | 12 + > > lib/librte_vhost/vhost_user.c | 292 +++++++++++++++++++++++++ > > lib/librte_vhost/vhost_user.h | 13 +- > > 6 files changed, 554 insertions(+), 1 deletion(-) > > > > diff --git a/lib/librte_vhost/rte_vhost.h > > b/lib/librte_vhost/rte_vhost.h index 0226b3eff..c9959c15a 100644 > > --- a/lib/librte_vhost/rte_vhost.h > > +++ b/lib/librte_vhost/rte_vhost.h > > @@ -71,6 +71,10 @@ extern "C" { > > #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 > > #endif > > > > +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD > > +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 #endif > > + > > /** Indicate whether protocol features negotiation is supported. */ > > #ifndef VHOST_USER_F_PROTOCOL_FEATURES > > #define VHOST_USER_F_PROTOCOL_FEATURES 30 > > @@ -98,12 +102,41 @@ struct rte_vhost_memory { > > struct rte_vhost_mem_region regions[]; > > }; > > > > +struct inflight_desc_split { > > + uint8_t inflight; > > + uint8_t padding[5]; > > + uint16_t next; > > + uint64_t counter; > > +}; > > + > > +struct inflight_info_split { > > + uint64_t features; > > + uint16_t version; > > + uint16_t desc_num; > > + uint16_t last_inflight_io; > > + uint16_t used_idx; > > + struct inflight_desc_split desc[0]; > > +}; > > + > > +struct resubmit_desc { > > + uint16_t index; > > + uint64_t counter; > > +}; > > + > > +struct resubmit_info { > > + struct resubmit_desc *resubmit_list; > > + uint16_t resubmit_num; > > +}; > > + > > struct rte_vhost_vring { > > struct vring_desc *desc; > > struct vring_avail *avail; > > struct vring_used *used; > > uint64_t log_guest_addr; > > > > + struct inflight_info_split *inflight; > > + struct resubmit_info *resubmit_inflight; > > + > > This is breaking the ABI, and discussing with Tiwei, we think it could be > avoided by defining its own structure of inflight and inflight resubmit > pointers. More below: So you means that define a new struct such as struct rte_vhost_inflight_ring? > > > /** Deprecated, use rte_vhost_vring_call() instead. */ > > int callfd; > > > > @@ -603,6 +636,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, > uint16_t queue_id, > > */ > > int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); > > > > +/** > > + * Get guest vring info, including the vring address, vring size, inflight, etc. > > + * > > + * @param vid > > + * vhost device ID > > + * @param vring_idx > > + * vring index > > + * @param vring > > + * the structure to hold the requested vring info > > + * @return > > + * 0 on success, -1 on failure > > + */ > > +int __rte_experimental > > +rte_vhost_get_vhost_vring_with_inflight_split(int vid, uint16_t vring_idx, > > + struct rte_vhost_vring *vring); > > Here a new API is introduced to get vting info with inflight. > It could be just changed to get only the inflight info. It means the caller would > first call rte_vhost_get_vhost_vring() and then the new function, which I > guess may be renamed to: > > rte_vhost_get_vhost_vring_inflight_split Got It. > > Other than that, the patch looks good to me. > > (Next time, please run get_maintainer.sh script so that all the maintainers > are in Cc:). > > Thanks, > Maxime Thanks Maxime, also I will submit an example code of vhost_user_blk which support the inflight feature. It can show how these APIs work. Thanks Jin ^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH] [v2, 1/2]vhost: support inflight share memory protocol feature 2019-06-26 16:38 [dpdk-dev] [PATCH] [v1]vhost: support inflight share memory protocol feature JinYu 2019-07-05 8:47 ` Maxime Coquelin @ 2019-07-08 17:53 ` JinYu 2019-07-08 18:00 ` JinYu 2019-07-08 18:16 ` [dpdk-dev] [PATCH v2] [1/2]vhost: " JinYu 1 sibling, 2 replies; 13+ messages in thread From: JinYu @ 2019-07-08 17:53 UTC (permalink / raw) To: dev; +Cc: changpeng.liu, JinYu, LinLi, XunNi, YuZhang This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer between qemu and backend. Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer from backend. Then qemu should send it back through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. This shared buffer is used to process inflight I/O when backend reconnect. Signed-off-by: LinLi <lilin24@baidu.com> Signed-off-by: XunNi <nixun@baidu.com> Signed-off-by: YuZhang <zhangyu31@baidu.com> Signed-off-by: JinYu <jin.yu@intel.com> --- v1 - specify the APIs are split-ring only v2 - fix APIs and judge split or packed --- lib/librte_vhost/rte_vhost.h | 105 +++++++++ lib/librte_vhost/rte_vhost_version.map | 4 + lib/librte_vhost/vhost.c | 158 ++++++++++++- lib/librte_vhost/vhost.h | 16 ++ lib/librte_vhost/vhost_user.c | 313 +++++++++++++++++++++++++ lib/librte_vhost/vhost_user.h | 13 +- 6 files changed, 607 insertions(+), 2 deletions(-) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 0226b3eff..a17aff876 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -71,6 +71,10 @@ extern "C" { #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 #endif +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + /** Indicate whether protocol features negotiation is supported. */ #ifndef VHOST_USER_F_PROTOCOL_FEATURES #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -98,6 +102,41 @@ struct rte_vhost_memory { struct rte_vhost_mem_region regions[]; }; +struct inflight_desc_split { + uint8_t inflight; + uint8_t padding[5]; + uint16_t next; + uint64_t counter; +}; + +struct inflight_info_split { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t last_inflight_io; + uint16_t used_idx; + struct inflight_desc_split desc[0]; +}; + +struct resubmit_desc { + uint16_t index; + uint64_t counter; +}; + +struct resubmit_info { + struct resubmit_desc *resubmit_list; + uint16_t resubmit_num; +}; + +struct rte_vhost_ring_inflight_split { + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + + struct resubmit_info *resubmit_inflight_split; +}; + struct rte_vhost_vring { struct vring_desc *desc; struct vring_avail *avail; @@ -603,6 +642,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, */ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); +/** + * Get guest inflight vring info, including inflight ring and resubmit list. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested inflight vring info + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring); + /** * Get guest vring info, including the vring address, vring size, etc. * @@ -631,6 +686,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, */ int rte_vhost_vring_call(int vid, uint16_t vring_idx); +/** + * set inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t idx); + +/** + * clear inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param last_used_idx + * next free used_idx + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx); + +/** + * set last inflight io index. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, + uint16_t idx); + /** * Get vhost RX queue avail count. * diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 5f1d4a75c..709593701 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -87,4 +87,8 @@ EXPERIMENTAL { rte_vdpa_relay_vring_used; rte_vhost_extern_callback_register; rte_vhost_driver_set_protocol_features; + rte_vhost_set_inflight_desc_split; + rte_vhost_clr_inflight_desc_split; + rte_vhost_set_last_inflight_io_split; + rte_vhost_get_vhost_vring_with_inflight_split; }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 981837b5d..ecd580cc5 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -242,6 +242,30 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy) close(vq->kickfd); } +void +cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + if (!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "function only be used for split inflight.\n"); + } else { + if (vq->inflight_split) + vq->inflight_split = NULL; + if (vq->resubmit_inflight_split) { + if (vq->resubmit_inflight_split->resubmit_list) { + free(vq->resubmit_inflight_split->resubmit_list); + vq->resubmit_inflight_split->resubmit_list = NULL; + } + free(vq->resubmit_inflight_split); + vq->resubmit_inflight_split = NULL; + } + } +} + /* * Unmap any memory, close any file descriptors and * free any memory owned by a device. @@ -253,8 +277,10 @@ cleanup_device(struct virtio_net *dev, int destroy) vhost_backend_cleanup(dev); - for (i = 0; i < dev->nr_vring; i++) + for (i = 0; i < dev->nr_vring; i++) { cleanup_vq(dev->virtqueue[i], destroy); + cleanup_vq_inflight_split(dev, dev->virtqueue[i]); + } } void @@ -726,6 +752,35 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) return 0; } +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring) +{ + struct virtio_net *dev; + struct inflight_info_split *inflight_vring; + + dev = get_device(vid); + if (dev == NULL || vring == NULL) + return -1; + + if (vring_idx >= VHOST_MAX_VRING) + return -1; + + inflight_vring = dev->virtqueue[vring_idx]->inflight_split; + if (!inflight_vring) + return -1; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "Can't be packed inflight ring.\n"); + } else { + vring->inflight_split = inflight_vring; + vring->resubmit_inflight_split = dev->virtqueue[vring_idx]->resubmit_inflight_split; + } + + return 0; +} + int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring) @@ -781,6 +836,107 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) return 0; } +int +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->desc[idx].counter = vq->global_counter++; + vq->inflight_split->desc[idx].inflight = 1; + return 0; +} + +int +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + rte_compiler_barrier(); + + vq->inflight_split->desc[idx].inflight = 0; + + rte_compiler_barrier(); + + vq->inflight_split->used_idx = last_used_idx; + return 0; +} + +int +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->last_inflight_io = idx; + return 0; +} + uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 884befa85..22925586d 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -128,6 +128,14 @@ struct vhost_virtqueue { /* Physical address of used ring, for logging */ uint64_t log_guest_addr; + /* inflight share memory info */ + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + struct resubmit_info *resubmit_inflight_split; + uint64_t global_counter; + uint16_t nr_zmbuf; uint16_t zmbuf_size; uint16_t last_zmbuf_idx; @@ -286,6 +294,12 @@ struct guest_page { uint64_t size; }; +struct inflight_mem_info { + int fd; + void *addr; + uint64_t size; +}; + /** * Device structure contains all configuration information relating * to the device. @@ -303,6 +317,7 @@ struct virtio_net { uint32_t nr_vring; int dequeue_zero_copy; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; + struct inflight_mem_info inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) char ifname[IF_NAME_SZ]; uint64_t log_size; @@ -467,6 +482,7 @@ void vhost_destroy_device(int); void vhost_destroy_device_notify(struct virtio_net *dev); void cleanup_vq(struct vhost_virtqueue *vq, int destroy); +void cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq); void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq); int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index c9e29ece8..feb642ad7 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -31,6 +31,8 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <assert.h> +#include <sys/syscall.h> +#include <asm/unistd.h> #ifdef RTE_LIBRTE_VHOST_NUMA #include <numaif.h> #endif @@ -49,6 +51,15 @@ #define VIRTIO_MIN_MTU 68 #define VIRTIO_MAX_MTU 65535 +#define INFLIGHT_ALIGNMENT 64 +#define INFLIGHT_VERSION 0xabcd +#define VIRTQUEUE_MAX_SIZE 1024 + +#define CLOEXEC 0x0001U + +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", @@ -78,6 +89,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN", [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END", + [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD", + [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD", }; static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg); @@ -160,6 +173,16 @@ vhost_backend_cleanup(struct virtio_net *dev) dev->log_addr = 0; } + if (dev->inflight_info.addr) { + munmap(dev->inflight_info.addr, dev->inflight_info.size); + dev->inflight_info.addr = NULL; + } + + if (dev->inflight_info.fd > 0) { + close(dev->inflight_info.fd); + dev->inflight_info.fd = -1; + } + if (dev->slave_req_fd >= 0) { close(dev->slave_req_fd); dev->slave_req_fd = -1; @@ -306,6 +329,7 @@ vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg, dev->virtqueue[dev->nr_vring] = NULL; cleanup_vq(vq, 1); + cleanup_vq_inflight_split(dev, vq); free_vq(dev, vq); } } @@ -1165,6 +1189,204 @@ virtio_is_ready(struct virtio_net *dev) return 1; } +static int mem_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} + +void *inflight_mem_alloc(const char *name, size_t size, int *fd) +{ + void *ptr; + int mfd = -1; + char fname[20] = "/tmp/memfd-XXXXXX"; + + *fd = -1; + mfd = mem_create(name, CLOEXEC); + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } else { + mfd = mkstemp(fname); + unlink(fname); + + if (mfd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "mkstemp fail for alloc inflight buffer\n"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +static uint32_t get_pervq_shm_size_split(uint16_t queue_size) +{ + return ALIGN_UP(sizeof(struct inflight_desc_split) * queue_size + + sizeof(uint64_t) * 1 + sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT); +} + +static int +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd; + uint64_t mmap_size; + void *addr; + uint16_t num_queues, queue_size; + struct virtio_net *dev = *pdev; + + if (msg->size != sizeof(msg->payload.inflight)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Invalid get_inflight_fd message size is %d", + msg->size); + return RTE_VHOST_MSG_RESULT_ERR; + } + + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n", + msg->payload.inflight.num_queues); + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n", + msg->payload.inflight.queue_size); + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + mmap_size = num_queues * get_pervq_shm_size_split(queue_size); + } + + addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd); + if (!addr) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost inflight area"); + msg->payload.inflight.mmap_size = 0; + return RTE_VHOST_MSG_RESULT_ERR; + } + memset(addr, 0, mmap_size); + + dev->inflight_info.addr = addr; + dev->inflight_info.size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info.fd = msg->fds[0] = fd; + msg->payload.inflight.mmap_offset = 0; + msg->fd_num = 1; + + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_size: %lu\n", + msg->payload.inflight.mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_offset: %lu\n", + msg->payload.inflight.mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight fd: %d\n", msg->fds[0]); + + return RTE_VHOST_MSG_RESULT_REPLY; +} + +static int +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd, i; + uint64_t mmap_size, mmap_offset; + uint16_t num_queues, queue_size; + uint32_t pervq_inflight_size; + void *addr; + struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; + + fd = msg->fds[0]; + if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd message size is %d,fd is %d\n", + msg->size, fd); + return RTE_VHOST_MSG_RESULT_ERR; + } + + mmap_size = msg->payload.inflight.mmap_size; + mmap_offset = msg->payload.inflight.mmap_offset; + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + pervq_inflight_size = get_pervq_shm_size_split(queue_size); + } + + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_size: %lu\n", mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_offset: %lu\n", mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd num_queues: %u\n", num_queues); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd queue_size: %u\n", queue_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd fd: %d\n", fd); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd pervq_inflight_size: %d\n", + pervq_inflight_size); + + if (dev->inflight_info.addr) + munmap(dev->inflight_info.addr, dev->inflight_info.size); + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + if (dev->inflight_info.fd) + close(dev->inflight_info.fd); + + dev->inflight_info.fd = fd; + dev->inflight_info.addr = addr; + dev->inflight_info.size = mmap_size; + + for (i = 0; i < num_queues; i++) { + vq = dev->virtqueue[i]; + if (vq_is_packed(dev)) { + /* TODO */ + } else { + vq->inflight_split = (struct inflight_info_split *)addr; + vq->inflight_split->desc_num = queue_size; + } + addr = (void *)((char *)addr + pervq_inflight_size); + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1201,6 +1423,89 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused, return RTE_VHOST_MSG_RESULT_OK; } +static int +resubmit_desc_compare(const void *a, const void *b) +{ + const struct resubmit_desc *desc0 = (const struct resubmit_desc *)a; + const struct resubmit_desc *desc1 = (const struct resubmit_desc *)b; + + if (desc1->counter > desc0->counter && + (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) + return 1; + + return -1; +} + +static int +vhost_check_queue_inflights_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct vring_used *used = vq->used; + uint16_t i = 0; + uint16_t resubmit_num = 0; + struct resubmit_info *resubmit = NULL; + + if (!(dev->features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + + if ((!vq->inflight_split)) + return RTE_VHOST_MSG_RESULT_ERR; + + if (!vq->inflight_split->version) { + vq->inflight_split->version = INFLIGHT_VERSION; + return RTE_VHOST_MSG_RESULT_OK; + } + + vq->resubmit_inflight_split = NULL; + vq->global_counter = 0; + + if (vq->inflight_split->used_idx != used->idx) { + vq->inflight_split->desc[vq->inflight_split->last_inflight_io].inflight = 0; + rte_compiler_barrier(); + vq->inflight_split->used_idx = used->idx; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) + resubmit_num++; + } + + vq->last_avail_idx += resubmit_num; + + if (resubmit_num) { + resubmit = calloc(1, sizeof(struct resubmit_info)); + if (!resubmit) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for resubmit info.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + resubmit->resubmit_list = calloc(resubmit_num, + sizeof(struct resubmit_desc)); + if (!resubmit->resubmit_list) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for inflight desc.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) { + resubmit->resubmit_list[resubmit->resubmit_num].index = i; + resubmit->resubmit_list[resubmit->resubmit_num].counter = + vq->inflight_split->desc[i].counter; + resubmit->resubmit_num++; + } + } + + if (resubmit->resubmit_num > 1) + qsort(resubmit->resubmit_list, resubmit->resubmit_num, + sizeof(struct resubmit_desc), resubmit_desc_compare); + + vq->global_counter = resubmit->resubmit_list[0].counter + 1; + vq->resubmit_inflight_split = resubmit; + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1242,6 +1547,12 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, close(vq->kickfd); vq->kickfd = file.fd; + if (vhost_check_queue_inflights_split(dev, vq)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to inflights for vq: %d\n", file.index); + return RTE_VHOST_MSG_RESULT_ERR; + } + return RTE_VHOST_MSG_RESULT_OK; } @@ -1762,6 +2073,8 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, + [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, + [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, }; diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 2a650fe4b..99a773910 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -54,7 +54,9 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_POSTCOPY_END = 30, - VHOST_USER_MAX = 31 + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_MAX = 33 } VhostUserRequest; typedef enum VhostUserSlaveRequest { @@ -112,6 +114,13 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + typedef struct VhostUserMsg { union { uint32_t master; /* a VhostUserRequest value */ @@ -131,6 +140,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + VhostUserInflight inflight; struct vhost_iotlb_msg iotlb; VhostUserCryptoSessionParam crypto_session; VhostUserVringArea area; @@ -148,6 +158,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); +void *inflight_mem_alloc(const char *name, size_t size, int *fd); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, -- 2.17.2 ^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH] [v2, 1/2]vhost: support inflight share memory protocol feature 2019-07-08 17:53 ` [dpdk-dev] [PATCH] [v2, 1/2]vhost: " JinYu @ 2019-07-08 18:00 ` JinYu 2019-07-08 18:00 ` [dpdk-dev] [PATCH v2] " JinYu 2019-07-08 18:16 ` [dpdk-dev] [PATCH v2] [1/2]vhost: " JinYu 1 sibling, 1 reply; 13+ messages in thread From: JinYu @ 2019-07-08 18:00 UTC (permalink / raw) To: dev Cc: changpeng.liu, maxime.coquelin, tiwei.bie, zhihong.wang, JinYu, LinLi, XunNi, YuZhang This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer between qemu and backend. Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer from backend. Then qemu should send it back through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. This shared buffer is used to process inflight I/O when backend reconnect. Signed-off-by: LinLi <lilin24@baidu.com> Signed-off-by: XunNi <nixun@baidu.com> Signed-off-by: YuZhang <zhangyu31@baidu.com> Signed-off-by: JinYu <jin.yu@intel.com> --- v1 - specify the APIs are split-ring only v2 - fix APIs and judge split or packed --- lib/librte_vhost/rte_vhost.h | 105 +++++++++ lib/librte_vhost/rte_vhost_version.map | 4 + lib/librte_vhost/vhost.c | 158 ++++++++++++- lib/librte_vhost/vhost.h | 16 ++ lib/librte_vhost/vhost_user.c | 313 +++++++++++++++++++++++++ lib/librte_vhost/vhost_user.h | 13 +- 6 files changed, 607 insertions(+), 2 deletions(-) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 0226b3eff..a17aff876 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -71,6 +71,10 @@ extern "C" { #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 #endif +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + /** Indicate whether protocol features negotiation is supported. */ #ifndef VHOST_USER_F_PROTOCOL_FEATURES #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -98,6 +102,41 @@ struct rte_vhost_memory { struct rte_vhost_mem_region regions[]; }; +struct inflight_desc_split { + uint8_t inflight; + uint8_t padding[5]; + uint16_t next; + uint64_t counter; +}; + +struct inflight_info_split { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t last_inflight_io; + uint16_t used_idx; + struct inflight_desc_split desc[0]; +}; + +struct resubmit_desc { + uint16_t index; + uint64_t counter; +}; + +struct resubmit_info { + struct resubmit_desc *resubmit_list; + uint16_t resubmit_num; +}; + +struct rte_vhost_ring_inflight_split { + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + + struct resubmit_info *resubmit_inflight_split; +}; + struct rte_vhost_vring { struct vring_desc *desc; struct vring_avail *avail; @@ -603,6 +642,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, */ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); +/** + * Get guest inflight vring info, including inflight ring and resubmit list. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested inflight vring info + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring); + /** * Get guest vring info, including the vring address, vring size, etc. * @@ -631,6 +686,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, */ int rte_vhost_vring_call(int vid, uint16_t vring_idx); +/** + * set inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t idx); + +/** + * clear inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param last_used_idx + * next free used_idx + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx); + +/** + * set last inflight io index. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, + uint16_t idx); + /** * Get vhost RX queue avail count. * diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 5f1d4a75c..709593701 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -87,4 +87,8 @@ EXPERIMENTAL { rte_vdpa_relay_vring_used; rte_vhost_extern_callback_register; rte_vhost_driver_set_protocol_features; + rte_vhost_set_inflight_desc_split; + rte_vhost_clr_inflight_desc_split; + rte_vhost_set_last_inflight_io_split; + rte_vhost_get_vhost_vring_with_inflight_split; }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 981837b5d..ecd580cc5 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -242,6 +242,30 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy) close(vq->kickfd); } +void +cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + if (!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "function only be used for split inflight.\n"); + } else { + if (vq->inflight_split) + vq->inflight_split = NULL; + if (vq->resubmit_inflight_split) { + if (vq->resubmit_inflight_split->resubmit_list) { + free(vq->resubmit_inflight_split->resubmit_list); + vq->resubmit_inflight_split->resubmit_list = NULL; + } + free(vq->resubmit_inflight_split); + vq->resubmit_inflight_split = NULL; + } + } +} + /* * Unmap any memory, close any file descriptors and * free any memory owned by a device. @@ -253,8 +277,10 @@ cleanup_device(struct virtio_net *dev, int destroy) vhost_backend_cleanup(dev); - for (i = 0; i < dev->nr_vring; i++) + for (i = 0; i < dev->nr_vring; i++) { cleanup_vq(dev->virtqueue[i], destroy); + cleanup_vq_inflight_split(dev, dev->virtqueue[i]); + } } void @@ -726,6 +752,35 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) return 0; } +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring) +{ + struct virtio_net *dev; + struct inflight_info_split *inflight_vring; + + dev = get_device(vid); + if (dev == NULL || vring == NULL) + return -1; + + if (vring_idx >= VHOST_MAX_VRING) + return -1; + + inflight_vring = dev->virtqueue[vring_idx]->inflight_split; + if (!inflight_vring) + return -1; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "Can't be packed inflight ring.\n"); + } else { + vring->inflight_split = inflight_vring; + vring->resubmit_inflight_split = dev->virtqueue[vring_idx]->resubmit_inflight_split; + } + + return 0; +} + int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring) @@ -781,6 +836,107 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) return 0; } +int +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->desc[idx].counter = vq->global_counter++; + vq->inflight_split->desc[idx].inflight = 1; + return 0; +} + +int +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + rte_compiler_barrier(); + + vq->inflight_split->desc[idx].inflight = 0; + + rte_compiler_barrier(); + + vq->inflight_split->used_idx = last_used_idx; + return 0; +} + +int +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->last_inflight_io = idx; + return 0; +} + uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 884befa85..22925586d 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -128,6 +128,14 @@ struct vhost_virtqueue { /* Physical address of used ring, for logging */ uint64_t log_guest_addr; + /* inflight share memory info */ + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + struct resubmit_info *resubmit_inflight_split; + uint64_t global_counter; + uint16_t nr_zmbuf; uint16_t zmbuf_size; uint16_t last_zmbuf_idx; @@ -286,6 +294,12 @@ struct guest_page { uint64_t size; }; +struct inflight_mem_info { + int fd; + void *addr; + uint64_t size; +}; + /** * Device structure contains all configuration information relating * to the device. @@ -303,6 +317,7 @@ struct virtio_net { uint32_t nr_vring; int dequeue_zero_copy; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; + struct inflight_mem_info inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) char ifname[IF_NAME_SZ]; uint64_t log_size; @@ -467,6 +482,7 @@ void vhost_destroy_device(int); void vhost_destroy_device_notify(struct virtio_net *dev); void cleanup_vq(struct vhost_virtqueue *vq, int destroy); +void cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq); void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq); int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index c9e29ece8..feb642ad7 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -31,6 +31,8 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <assert.h> +#include <sys/syscall.h> +#include <asm/unistd.h> #ifdef RTE_LIBRTE_VHOST_NUMA #include <numaif.h> #endif @@ -49,6 +51,15 @@ #define VIRTIO_MIN_MTU 68 #define VIRTIO_MAX_MTU 65535 +#define INFLIGHT_ALIGNMENT 64 +#define INFLIGHT_VERSION 0xabcd +#define VIRTQUEUE_MAX_SIZE 1024 + +#define CLOEXEC 0x0001U + +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", @@ -78,6 +89,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN", [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END", + [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD", + [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD", }; static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg); @@ -160,6 +173,16 @@ vhost_backend_cleanup(struct virtio_net *dev) dev->log_addr = 0; } + if (dev->inflight_info.addr) { + munmap(dev->inflight_info.addr, dev->inflight_info.size); + dev->inflight_info.addr = NULL; + } + + if (dev->inflight_info.fd > 0) { + close(dev->inflight_info.fd); + dev->inflight_info.fd = -1; + } + if (dev->slave_req_fd >= 0) { close(dev->slave_req_fd); dev->slave_req_fd = -1; @@ -306,6 +329,7 @@ vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg, dev->virtqueue[dev->nr_vring] = NULL; cleanup_vq(vq, 1); + cleanup_vq_inflight_split(dev, vq); free_vq(dev, vq); } } @@ -1165,6 +1189,204 @@ virtio_is_ready(struct virtio_net *dev) return 1; } +static int mem_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} + +void *inflight_mem_alloc(const char *name, size_t size, int *fd) +{ + void *ptr; + int mfd = -1; + char fname[20] = "/tmp/memfd-XXXXXX"; + + *fd = -1; + mfd = mem_create(name, CLOEXEC); + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } else { + mfd = mkstemp(fname); + unlink(fname); + + if (mfd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "mkstemp fail for alloc inflight buffer\n"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +static uint32_t get_pervq_shm_size_split(uint16_t queue_size) +{ + return ALIGN_UP(sizeof(struct inflight_desc_split) * queue_size + + sizeof(uint64_t) * 1 + sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT); +} + +static int +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd; + uint64_t mmap_size; + void *addr; + uint16_t num_queues, queue_size; + struct virtio_net *dev = *pdev; + + if (msg->size != sizeof(msg->payload.inflight)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Invalid get_inflight_fd message size is %d", + msg->size); + return RTE_VHOST_MSG_RESULT_ERR; + } + + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n", + msg->payload.inflight.num_queues); + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n", + msg->payload.inflight.queue_size); + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + mmap_size = num_queues * get_pervq_shm_size_split(queue_size); + } + + addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd); + if (!addr) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost inflight area"); + msg->payload.inflight.mmap_size = 0; + return RTE_VHOST_MSG_RESULT_ERR; + } + memset(addr, 0, mmap_size); + + dev->inflight_info.addr = addr; + dev->inflight_info.size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info.fd = msg->fds[0] = fd; + msg->payload.inflight.mmap_offset = 0; + msg->fd_num = 1; + + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_size: %lu\n", + msg->payload.inflight.mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_offset: %lu\n", + msg->payload.inflight.mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight fd: %d\n", msg->fds[0]); + + return RTE_VHOST_MSG_RESULT_REPLY; +} + +static int +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd, i; + uint64_t mmap_size, mmap_offset; + uint16_t num_queues, queue_size; + uint32_t pervq_inflight_size; + void *addr; + struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; + + fd = msg->fds[0]; + if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd message size is %d,fd is %d\n", + msg->size, fd); + return RTE_VHOST_MSG_RESULT_ERR; + } + + mmap_size = msg->payload.inflight.mmap_size; + mmap_offset = msg->payload.inflight.mmap_offset; + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + pervq_inflight_size = get_pervq_shm_size_split(queue_size); + } + + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_size: %lu\n", mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_offset: %lu\n", mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd num_queues: %u\n", num_queues); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd queue_size: %u\n", queue_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd fd: %d\n", fd); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd pervq_inflight_size: %d\n", + pervq_inflight_size); + + if (dev->inflight_info.addr) + munmap(dev->inflight_info.addr, dev->inflight_info.size); + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + if (dev->inflight_info.fd) + close(dev->inflight_info.fd); + + dev->inflight_info.fd = fd; + dev->inflight_info.addr = addr; + dev->inflight_info.size = mmap_size; + + for (i = 0; i < num_queues; i++) { + vq = dev->virtqueue[i]; + if (vq_is_packed(dev)) { + /* TODO */ + } else { + vq->inflight_split = (struct inflight_info_split *)addr; + vq->inflight_split->desc_num = queue_size; + } + addr = (void *)((char *)addr + pervq_inflight_size); + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1201,6 +1423,89 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused, return RTE_VHOST_MSG_RESULT_OK; } +static int +resubmit_desc_compare(const void *a, const void *b) +{ + const struct resubmit_desc *desc0 = (const struct resubmit_desc *)a; + const struct resubmit_desc *desc1 = (const struct resubmit_desc *)b; + + if (desc1->counter > desc0->counter && + (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) + return 1; + + return -1; +} + +static int +vhost_check_queue_inflights_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct vring_used *used = vq->used; + uint16_t i = 0; + uint16_t resubmit_num = 0; + struct resubmit_info *resubmit = NULL; + + if (!(dev->features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + + if ((!vq->inflight_split)) + return RTE_VHOST_MSG_RESULT_ERR; + + if (!vq->inflight_split->version) { + vq->inflight_split->version = INFLIGHT_VERSION; + return RTE_VHOST_MSG_RESULT_OK; + } + + vq->resubmit_inflight_split = NULL; + vq->global_counter = 0; + + if (vq->inflight_split->used_idx != used->idx) { + vq->inflight_split->desc[vq->inflight_split->last_inflight_io].inflight = 0; + rte_compiler_barrier(); + vq->inflight_split->used_idx = used->idx; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) + resubmit_num++; + } + + vq->last_avail_idx += resubmit_num; + + if (resubmit_num) { + resubmit = calloc(1, sizeof(struct resubmit_info)); + if (!resubmit) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for resubmit info.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + resubmit->resubmit_list = calloc(resubmit_num, + sizeof(struct resubmit_desc)); + if (!resubmit->resubmit_list) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for inflight desc.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) { + resubmit->resubmit_list[resubmit->resubmit_num].index = i; + resubmit->resubmit_list[resubmit->resubmit_num].counter = + vq->inflight_split->desc[i].counter; + resubmit->resubmit_num++; + } + } + + if (resubmit->resubmit_num > 1) + qsort(resubmit->resubmit_list, resubmit->resubmit_num, + sizeof(struct resubmit_desc), resubmit_desc_compare); + + vq->global_counter = resubmit->resubmit_list[0].counter + 1; + vq->resubmit_inflight_split = resubmit; + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1242,6 +1547,12 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, close(vq->kickfd); vq->kickfd = file.fd; + if (vhost_check_queue_inflights_split(dev, vq)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to inflights for vq: %d\n", file.index); + return RTE_VHOST_MSG_RESULT_ERR; + } + return RTE_VHOST_MSG_RESULT_OK; } @@ -1762,6 +2073,8 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, + [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, + [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, }; diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 2a650fe4b..99a773910 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -54,7 +54,9 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_POSTCOPY_END = 30, - VHOST_USER_MAX = 31 + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_MAX = 33 } VhostUserRequest; typedef enum VhostUserSlaveRequest { @@ -112,6 +114,13 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + typedef struct VhostUserMsg { union { uint32_t master; /* a VhostUserRequest value */ @@ -131,6 +140,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + VhostUserInflight inflight; struct vhost_iotlb_msg iotlb; VhostUserCryptoSessionParam crypto_session; VhostUserVringArea area; @@ -148,6 +158,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); +void *inflight_mem_alloc(const char *name, size_t size, int *fd); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, -- 2.17.2 ^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH v2] [v2, 1/2]vhost: support inflight share memory protocol feature 2019-07-08 18:00 ` JinYu @ 2019-07-08 18:00 ` JinYu 0 siblings, 0 replies; 13+ messages in thread From: JinYu @ 2019-07-08 18:00 UTC (permalink / raw) To: dev Cc: changpeng.liu, maxime.coquelin, tiwei.bie, zhihong.wang, JinYu, LinLi, XunNi, YuZhang This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer between qemu and backend. Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer from backend. Then qemu should send it back through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. This shared buffer is used to process inflight I/O when backend reconnect. Signed-off-by: LinLi <lilin24@baidu.com> Signed-off-by: XunNi <nixun@baidu.com> Signed-off-by: YuZhang <zhangyu31@baidu.com> Signed-off-by: JinYu <jin.yu@intel.com> --- v1 - specify the APIs are split-ring only v2 - fix APIs and judge split or packed --- lib/librte_vhost/rte_vhost.h | 105 +++++++++ lib/librte_vhost/rte_vhost_version.map | 4 + lib/librte_vhost/vhost.c | 158 ++++++++++++- lib/librte_vhost/vhost.h | 16 ++ lib/librte_vhost/vhost_user.c | 313 +++++++++++++++++++++++++ lib/librte_vhost/vhost_user.h | 13 +- 6 files changed, 607 insertions(+), 2 deletions(-) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 0226b3eff..a17aff876 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -71,6 +71,10 @@ extern "C" { #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 #endif +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + /** Indicate whether protocol features negotiation is supported. */ #ifndef VHOST_USER_F_PROTOCOL_FEATURES #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -98,6 +102,41 @@ struct rte_vhost_memory { struct rte_vhost_mem_region regions[]; }; +struct inflight_desc_split { + uint8_t inflight; + uint8_t padding[5]; + uint16_t next; + uint64_t counter; +}; + +struct inflight_info_split { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t last_inflight_io; + uint16_t used_idx; + struct inflight_desc_split desc[0]; +}; + +struct resubmit_desc { + uint16_t index; + uint64_t counter; +}; + +struct resubmit_info { + struct resubmit_desc *resubmit_list; + uint16_t resubmit_num; +}; + +struct rte_vhost_ring_inflight_split { + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + + struct resubmit_info *resubmit_inflight_split; +}; + struct rte_vhost_vring { struct vring_desc *desc; struct vring_avail *avail; @@ -603,6 +642,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, */ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); +/** + * Get guest inflight vring info, including inflight ring and resubmit list. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested inflight vring info + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring); + /** * Get guest vring info, including the vring address, vring size, etc. * @@ -631,6 +686,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, */ int rte_vhost_vring_call(int vid, uint16_t vring_idx); +/** + * set inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t idx); + +/** + * clear inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param last_used_idx + * next free used_idx + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx); + +/** + * set last inflight io index. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, + uint16_t idx); + /** * Get vhost RX queue avail count. * diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 5f1d4a75c..709593701 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -87,4 +87,8 @@ EXPERIMENTAL { rte_vdpa_relay_vring_used; rte_vhost_extern_callback_register; rte_vhost_driver_set_protocol_features; + rte_vhost_set_inflight_desc_split; + rte_vhost_clr_inflight_desc_split; + rte_vhost_set_last_inflight_io_split; + rte_vhost_get_vhost_vring_with_inflight_split; }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 981837b5d..ecd580cc5 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -242,6 +242,30 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy) close(vq->kickfd); } +void +cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + if (!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "function only be used for split inflight.\n"); + } else { + if (vq->inflight_split) + vq->inflight_split = NULL; + if (vq->resubmit_inflight_split) { + if (vq->resubmit_inflight_split->resubmit_list) { + free(vq->resubmit_inflight_split->resubmit_list); + vq->resubmit_inflight_split->resubmit_list = NULL; + } + free(vq->resubmit_inflight_split); + vq->resubmit_inflight_split = NULL; + } + } +} + /* * Unmap any memory, close any file descriptors and * free any memory owned by a device. @@ -253,8 +277,10 @@ cleanup_device(struct virtio_net *dev, int destroy) vhost_backend_cleanup(dev); - for (i = 0; i < dev->nr_vring; i++) + for (i = 0; i < dev->nr_vring; i++) { cleanup_vq(dev->virtqueue[i], destroy); + cleanup_vq_inflight_split(dev, dev->virtqueue[i]); + } } void @@ -726,6 +752,35 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) return 0; } +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring) +{ + struct virtio_net *dev; + struct inflight_info_split *inflight_vring; + + dev = get_device(vid); + if (dev == NULL || vring == NULL) + return -1; + + if (vring_idx >= VHOST_MAX_VRING) + return -1; + + inflight_vring = dev->virtqueue[vring_idx]->inflight_split; + if (!inflight_vring) + return -1; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "Can't be packed inflight ring.\n"); + } else { + vring->inflight_split = inflight_vring; + vring->resubmit_inflight_split = dev->virtqueue[vring_idx]->resubmit_inflight_split; + } + + return 0; +} + int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring) @@ -781,6 +836,107 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) return 0; } +int +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->desc[idx].counter = vq->global_counter++; + vq->inflight_split->desc[idx].inflight = 1; + return 0; +} + +int +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + rte_compiler_barrier(); + + vq->inflight_split->desc[idx].inflight = 0; + + rte_compiler_barrier(); + + vq->inflight_split->used_idx = last_used_idx; + return 0; +} + +int +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->last_inflight_io = idx; + return 0; +} + uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 884befa85..22925586d 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -128,6 +128,14 @@ struct vhost_virtqueue { /* Physical address of used ring, for logging */ uint64_t log_guest_addr; + /* inflight share memory info */ + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + struct resubmit_info *resubmit_inflight_split; + uint64_t global_counter; + uint16_t nr_zmbuf; uint16_t zmbuf_size; uint16_t last_zmbuf_idx; @@ -286,6 +294,12 @@ struct guest_page { uint64_t size; }; +struct inflight_mem_info { + int fd; + void *addr; + uint64_t size; +}; + /** * Device structure contains all configuration information relating * to the device. @@ -303,6 +317,7 @@ struct virtio_net { uint32_t nr_vring; int dequeue_zero_copy; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; + struct inflight_mem_info inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) char ifname[IF_NAME_SZ]; uint64_t log_size; @@ -467,6 +482,7 @@ void vhost_destroy_device(int); void vhost_destroy_device_notify(struct virtio_net *dev); void cleanup_vq(struct vhost_virtqueue *vq, int destroy); +void cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq); void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq); int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index c9e29ece8..feb642ad7 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -31,6 +31,8 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <assert.h> +#include <sys/syscall.h> +#include <asm/unistd.h> #ifdef RTE_LIBRTE_VHOST_NUMA #include <numaif.h> #endif @@ -49,6 +51,15 @@ #define VIRTIO_MIN_MTU 68 #define VIRTIO_MAX_MTU 65535 +#define INFLIGHT_ALIGNMENT 64 +#define INFLIGHT_VERSION 0xabcd +#define VIRTQUEUE_MAX_SIZE 1024 + +#define CLOEXEC 0x0001U + +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", @@ -78,6 +89,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN", [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END", + [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD", + [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD", }; static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg); @@ -160,6 +173,16 @@ vhost_backend_cleanup(struct virtio_net *dev) dev->log_addr = 0; } + if (dev->inflight_info.addr) { + munmap(dev->inflight_info.addr, dev->inflight_info.size); + dev->inflight_info.addr = NULL; + } + + if (dev->inflight_info.fd > 0) { + close(dev->inflight_info.fd); + dev->inflight_info.fd = -1; + } + if (dev->slave_req_fd >= 0) { close(dev->slave_req_fd); dev->slave_req_fd = -1; @@ -306,6 +329,7 @@ vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg, dev->virtqueue[dev->nr_vring] = NULL; cleanup_vq(vq, 1); + cleanup_vq_inflight_split(dev, vq); free_vq(dev, vq); } } @@ -1165,6 +1189,204 @@ virtio_is_ready(struct virtio_net *dev) return 1; } +static int mem_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} + +void *inflight_mem_alloc(const char *name, size_t size, int *fd) +{ + void *ptr; + int mfd = -1; + char fname[20] = "/tmp/memfd-XXXXXX"; + + *fd = -1; + mfd = mem_create(name, CLOEXEC); + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } else { + mfd = mkstemp(fname); + unlink(fname); + + if (mfd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "mkstemp fail for alloc inflight buffer\n"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +static uint32_t get_pervq_shm_size_split(uint16_t queue_size) +{ + return ALIGN_UP(sizeof(struct inflight_desc_split) * queue_size + + sizeof(uint64_t) * 1 + sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT); +} + +static int +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd; + uint64_t mmap_size; + void *addr; + uint16_t num_queues, queue_size; + struct virtio_net *dev = *pdev; + + if (msg->size != sizeof(msg->payload.inflight)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Invalid get_inflight_fd message size is %d", + msg->size); + return RTE_VHOST_MSG_RESULT_ERR; + } + + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n", + msg->payload.inflight.num_queues); + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n", + msg->payload.inflight.queue_size); + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + mmap_size = num_queues * get_pervq_shm_size_split(queue_size); + } + + addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd); + if (!addr) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost inflight area"); + msg->payload.inflight.mmap_size = 0; + return RTE_VHOST_MSG_RESULT_ERR; + } + memset(addr, 0, mmap_size); + + dev->inflight_info.addr = addr; + dev->inflight_info.size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info.fd = msg->fds[0] = fd; + msg->payload.inflight.mmap_offset = 0; + msg->fd_num = 1; + + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_size: %lu\n", + msg->payload.inflight.mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_offset: %lu\n", + msg->payload.inflight.mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight fd: %d\n", msg->fds[0]); + + return RTE_VHOST_MSG_RESULT_REPLY; +} + +static int +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd, i; + uint64_t mmap_size, mmap_offset; + uint16_t num_queues, queue_size; + uint32_t pervq_inflight_size; + void *addr; + struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; + + fd = msg->fds[0]; + if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd message size is %d,fd is %d\n", + msg->size, fd); + return RTE_VHOST_MSG_RESULT_ERR; + } + + mmap_size = msg->payload.inflight.mmap_size; + mmap_offset = msg->payload.inflight.mmap_offset; + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + pervq_inflight_size = get_pervq_shm_size_split(queue_size); + } + + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_size: %lu\n", mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_offset: %lu\n", mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd num_queues: %u\n", num_queues); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd queue_size: %u\n", queue_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd fd: %d\n", fd); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd pervq_inflight_size: %d\n", + pervq_inflight_size); + + if (dev->inflight_info.addr) + munmap(dev->inflight_info.addr, dev->inflight_info.size); + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + if (dev->inflight_info.fd) + close(dev->inflight_info.fd); + + dev->inflight_info.fd = fd; + dev->inflight_info.addr = addr; + dev->inflight_info.size = mmap_size; + + for (i = 0; i < num_queues; i++) { + vq = dev->virtqueue[i]; + if (vq_is_packed(dev)) { + /* TODO */ + } else { + vq->inflight_split = (struct inflight_info_split *)addr; + vq->inflight_split->desc_num = queue_size; + } + addr = (void *)((char *)addr + pervq_inflight_size); + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1201,6 +1423,89 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused, return RTE_VHOST_MSG_RESULT_OK; } +static int +resubmit_desc_compare(const void *a, const void *b) +{ + const struct resubmit_desc *desc0 = (const struct resubmit_desc *)a; + const struct resubmit_desc *desc1 = (const struct resubmit_desc *)b; + + if (desc1->counter > desc0->counter && + (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) + return 1; + + return -1; +} + +static int +vhost_check_queue_inflights_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct vring_used *used = vq->used; + uint16_t i = 0; + uint16_t resubmit_num = 0; + struct resubmit_info *resubmit = NULL; + + if (!(dev->features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + + if ((!vq->inflight_split)) + return RTE_VHOST_MSG_RESULT_ERR; + + if (!vq->inflight_split->version) { + vq->inflight_split->version = INFLIGHT_VERSION; + return RTE_VHOST_MSG_RESULT_OK; + } + + vq->resubmit_inflight_split = NULL; + vq->global_counter = 0; + + if (vq->inflight_split->used_idx != used->idx) { + vq->inflight_split->desc[vq->inflight_split->last_inflight_io].inflight = 0; + rte_compiler_barrier(); + vq->inflight_split->used_idx = used->idx; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) + resubmit_num++; + } + + vq->last_avail_idx += resubmit_num; + + if (resubmit_num) { + resubmit = calloc(1, sizeof(struct resubmit_info)); + if (!resubmit) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for resubmit info.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + resubmit->resubmit_list = calloc(resubmit_num, + sizeof(struct resubmit_desc)); + if (!resubmit->resubmit_list) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for inflight desc.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) { + resubmit->resubmit_list[resubmit->resubmit_num].index = i; + resubmit->resubmit_list[resubmit->resubmit_num].counter = + vq->inflight_split->desc[i].counter; + resubmit->resubmit_num++; + } + } + + if (resubmit->resubmit_num > 1) + qsort(resubmit->resubmit_list, resubmit->resubmit_num, + sizeof(struct resubmit_desc), resubmit_desc_compare); + + vq->global_counter = resubmit->resubmit_list[0].counter + 1; + vq->resubmit_inflight_split = resubmit; + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1242,6 +1547,12 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, close(vq->kickfd); vq->kickfd = file.fd; + if (vhost_check_queue_inflights_split(dev, vq)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to inflights for vq: %d\n", file.index); + return RTE_VHOST_MSG_RESULT_ERR; + } + return RTE_VHOST_MSG_RESULT_OK; } @@ -1762,6 +2073,8 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, + [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, + [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, }; diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 2a650fe4b..99a773910 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -54,7 +54,9 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_POSTCOPY_END = 30, - VHOST_USER_MAX = 31 + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_MAX = 33 } VhostUserRequest; typedef enum VhostUserSlaveRequest { @@ -112,6 +114,13 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + typedef struct VhostUserMsg { union { uint32_t master; /* a VhostUserRequest value */ @@ -131,6 +140,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + VhostUserInflight inflight; struct vhost_iotlb_msg iotlb; VhostUserCryptoSessionParam crypto_session; VhostUserVringArea area; @@ -148,6 +158,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); +void *inflight_mem_alloc(const char *name, size_t size, int *fd); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, -- 2.17.2 ^ permalink raw reply [flat|nested] 13+ messages in thread
* [dpdk-dev] [PATCH v2] [1/2]vhost: support inflight share memory protocol feature 2019-07-08 17:53 ` [dpdk-dev] [PATCH] [v2, 1/2]vhost: " JinYu 2019-07-08 18:00 ` JinYu @ 2019-07-08 18:16 ` JinYu 1 sibling, 0 replies; 13+ messages in thread From: JinYu @ 2019-07-08 18:16 UTC (permalink / raw) To: dev Cc: changpeng.liu, maxime.coquelin, tiwei.bie, zhihong.wang, JinYu, LinLi, XunNi, YuZhang This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer between qemu and backend. Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer from backend. Then qemu should send it back through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. This shared buffer is used to process inflight I/O when backend reconnect. Signed-off-by: LinLi <lilin24@baidu.com> Signed-off-by: XunNi <nixun@baidu.com> Signed-off-by: YuZhang <zhangyu31@baidu.com> Signed-off-by: JinYu <jin.yu@intel.com> --- v1 - specify the APIs are split-ring only v2 - fix APIs and judge split or packed --- lib/librte_vhost/rte_vhost.h | 105 +++++++++ lib/librte_vhost/rte_vhost_version.map | 4 + lib/librte_vhost/vhost.c | 158 ++++++++++++- lib/librte_vhost/vhost.h | 16 ++ lib/librte_vhost/vhost_user.c | 313 +++++++++++++++++++++++++ lib/librte_vhost/vhost_user.h | 13 +- 6 files changed, 607 insertions(+), 2 deletions(-) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 0226b3eff..a17aff876 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -71,6 +71,10 @@ extern "C" { #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 #endif +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + /** Indicate whether protocol features negotiation is supported. */ #ifndef VHOST_USER_F_PROTOCOL_FEATURES #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -98,6 +102,41 @@ struct rte_vhost_memory { struct rte_vhost_mem_region regions[]; }; +struct inflight_desc_split { + uint8_t inflight; + uint8_t padding[5]; + uint16_t next; + uint64_t counter; +}; + +struct inflight_info_split { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t last_inflight_io; + uint16_t used_idx; + struct inflight_desc_split desc[0]; +}; + +struct resubmit_desc { + uint16_t index; + uint64_t counter; +}; + +struct resubmit_info { + struct resubmit_desc *resubmit_list; + uint16_t resubmit_num; +}; + +struct rte_vhost_ring_inflight_split { + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + + struct resubmit_info *resubmit_inflight_split; +}; + struct rte_vhost_vring { struct vring_desc *desc; struct vring_avail *avail; @@ -603,6 +642,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, */ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); +/** + * Get guest inflight vring info, including inflight ring and resubmit list. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested inflight vring info + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring); + /** * Get guest vring info, including the vring address, vring size, etc. * @@ -631,6 +686,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, */ int rte_vhost_vring_call(int vid, uint16_t vring_idx); +/** + * set inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t idx); + +/** + * clear inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param last_used_idx + * next free used_idx + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx); + +/** + * set last inflight io index. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, + uint16_t idx); + /** * Get vhost RX queue avail count. * diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 5f1d4a75c..709593701 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -87,4 +87,8 @@ EXPERIMENTAL { rte_vdpa_relay_vring_used; rte_vhost_extern_callback_register; rte_vhost_driver_set_protocol_features; + rte_vhost_set_inflight_desc_split; + rte_vhost_clr_inflight_desc_split; + rte_vhost_set_last_inflight_io_split; + rte_vhost_get_vhost_vring_with_inflight_split; }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 981837b5d..ecd580cc5 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -242,6 +242,30 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy) close(vq->kickfd); } +void +cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + if (!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "function only be used for split inflight.\n"); + } else { + if (vq->inflight_split) + vq->inflight_split = NULL; + if (vq->resubmit_inflight_split) { + if (vq->resubmit_inflight_split->resubmit_list) { + free(vq->resubmit_inflight_split->resubmit_list); + vq->resubmit_inflight_split->resubmit_list = NULL; + } + free(vq->resubmit_inflight_split); + vq->resubmit_inflight_split = NULL; + } + } +} + /* * Unmap any memory, close any file descriptors and * free any memory owned by a device. @@ -253,8 +277,10 @@ cleanup_device(struct virtio_net *dev, int destroy) vhost_backend_cleanup(dev); - for (i = 0; i < dev->nr_vring; i++) + for (i = 0; i < dev->nr_vring; i++) { cleanup_vq(dev->virtqueue[i], destroy); + cleanup_vq_inflight_split(dev, dev->virtqueue[i]); + } } void @@ -726,6 +752,35 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) return 0; } +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring) +{ + struct virtio_net *dev; + struct inflight_info_split *inflight_vring; + + dev = get_device(vid); + if (dev == NULL || vring == NULL) + return -1; + + if (vring_idx >= VHOST_MAX_VRING) + return -1; + + inflight_vring = dev->virtqueue[vring_idx]->inflight_split; + if (!inflight_vring) + return -1; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "Can't be packed inflight ring.\n"); + } else { + vring->inflight_split = inflight_vring; + vring->resubmit_inflight_split = dev->virtqueue[vring_idx]->resubmit_inflight_split; + } + + return 0; +} + int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring) @@ -781,6 +836,107 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) return 0; } +int +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->desc[idx].counter = vq->global_counter++; + vq->inflight_split->desc[idx].inflight = 1; + return 0; +} + +int +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + rte_compiler_barrier(); + + vq->inflight_split->desc[idx].inflight = 0; + + rte_compiler_barrier(); + + vq->inflight_split->used_idx = last_used_idx; + return 0; +} + +int +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->last_inflight_io = idx; + return 0; +} + uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 884befa85..22925586d 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -128,6 +128,14 @@ struct vhost_virtqueue { /* Physical address of used ring, for logging */ uint64_t log_guest_addr; + /* inflight share memory info */ + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + struct resubmit_info *resubmit_inflight_split; + uint64_t global_counter; + uint16_t nr_zmbuf; uint16_t zmbuf_size; uint16_t last_zmbuf_idx; @@ -286,6 +294,12 @@ struct guest_page { uint64_t size; }; +struct inflight_mem_info { + int fd; + void *addr; + uint64_t size; +}; + /** * Device structure contains all configuration information relating * to the device. @@ -303,6 +317,7 @@ struct virtio_net { uint32_t nr_vring; int dequeue_zero_copy; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; + struct inflight_mem_info inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) char ifname[IF_NAME_SZ]; uint64_t log_size; @@ -467,6 +482,7 @@ void vhost_destroy_device(int); void vhost_destroy_device_notify(struct virtio_net *dev); void cleanup_vq(struct vhost_virtqueue *vq, int destroy); +void cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq); void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq); int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index c9e29ece8..32cfcd258 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -31,6 +31,8 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <assert.h> +#include <sys/syscall.h> +#include <asm/unistd.h> #ifdef RTE_LIBRTE_VHOST_NUMA #include <numaif.h> #endif @@ -49,6 +51,15 @@ #define VIRTIO_MIN_MTU 68 #define VIRTIO_MAX_MTU 65535 +#define INFLIGHT_ALIGNMENT 64 +#define INFLIGHT_VERSION 0xabcd +#define VIRTQUEUE_MAX_SIZE 1024 + +#define CLOEXEC 0x0001U + +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", @@ -78,6 +89,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN", [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END", + [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD", + [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD", }; static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg); @@ -160,6 +173,16 @@ vhost_backend_cleanup(struct virtio_net *dev) dev->log_addr = 0; } + if (dev->inflight_info.addr) { + munmap(dev->inflight_info.addr, dev->inflight_info.size); + dev->inflight_info.addr = NULL; + } + + if (dev->inflight_info.fd > 0) { + close(dev->inflight_info.fd); + dev->inflight_info.fd = -1; + } + if (dev->slave_req_fd >= 0) { close(dev->slave_req_fd); dev->slave_req_fd = -1; @@ -306,6 +329,7 @@ vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg, dev->virtqueue[dev->nr_vring] = NULL; cleanup_vq(vq, 1); + cleanup_vq_inflight_split(dev, vq); free_vq(dev, vq); } } @@ -1165,6 +1189,204 @@ virtio_is_ready(struct virtio_net *dev) return 1; } +static int mem_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} + +void *inflight_mem_alloc(const char *name, size_t size, int *fd) +{ + void *ptr; + int mfd = -1; + char fname[20] = "/tmp/memfd-XXXXXX"; + + *fd = -1; + mfd = mem_create(name, CLOEXEC); + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } else { + mfd = mkstemp(fname); + unlink(fname); + + if (mfd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "mkstemp fail for alloc inflight buffer\n"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +static uint32_t get_pervq_shm_size_split(uint16_t queue_size) +{ + return ALIGN_UP(sizeof(struct inflight_desc_split) * queue_size + + sizeof(uint64_t) * 1 + sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT); +} + +static int +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd; + uint64_t mmap_size; + void *addr; + uint16_t num_queues, queue_size; + struct virtio_net *dev = *pdev; + + if (msg->size != sizeof(msg->payload.inflight)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Invalid get_inflight_fd message size is %d", + msg->size); + return RTE_VHOST_MSG_RESULT_ERR; + } + + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n", + msg->payload.inflight.num_queues); + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n", + msg->payload.inflight.queue_size); + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + mmap_size = num_queues * get_pervq_shm_size_split(queue_size); + } + + addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd); + if (!addr) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost inflight area"); + msg->payload.inflight.mmap_size = 0; + return RTE_VHOST_MSG_RESULT_ERR; + } + memset(addr, 0, mmap_size); + + dev->inflight_info.addr = addr; + dev->inflight_info.size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info.fd = msg->fds[0] = fd; + msg->payload.inflight.mmap_offset = 0; + msg->fd_num = 1; + + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_size: %lu\n", + msg->payload.inflight.mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_offset: %lu\n", + msg->payload.inflight.mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight fd: %d\n", msg->fds[0]); + + return RTE_VHOST_MSG_RESULT_REPLY; +} + +static int +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd, i; + uint64_t mmap_size, mmap_offset; + uint16_t num_queues, queue_size; + uint32_t pervq_inflight_size; + void *addr; + struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; + + fd = msg->fds[0]; + if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd message size is %d,fd is %d\n", + msg->size, fd); + return RTE_VHOST_MSG_RESULT_ERR; + } + + mmap_size = msg->payload.inflight.mmap_size; + mmap_offset = msg->payload.inflight.mmap_offset; + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } else { + pervq_inflight_size = get_pervq_shm_size_split(queue_size); + } + + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_size: %lu\n", mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_offset: %lu\n", mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd num_queues: %u\n", num_queues); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd queue_size: %u\n", queue_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd fd: %d\n", fd); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd pervq_inflight_size: %d\n", + pervq_inflight_size); + + if (dev->inflight_info.addr) + munmap(dev->inflight_info.addr, dev->inflight_info.size); + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + if (dev->inflight_info.fd) + close(dev->inflight_info.fd); + + dev->inflight_info.fd = fd; + dev->inflight_info.addr = addr; + dev->inflight_info.size = mmap_size; + + for (i = 0; i < num_queues; i++) { + vq = dev->virtqueue[i]; + if (vq_is_packed(dev)) { + /* TODO */ + } else { + vq->inflight_split = (struct inflight_info_split *)addr; + vq->inflight_split->desc_num = queue_size; + } + addr = (void *)((char *)addr + pervq_inflight_size); + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1201,6 +1423,89 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused, return RTE_VHOST_MSG_RESULT_OK; } +static int +resubmit_desc_compare(const void *a, const void *b) +{ + const struct resubmit_desc *desc0 = (const struct resubmit_desc *)a; + const struct resubmit_desc *desc1 = (const struct resubmit_desc *)b; + + if (desc1->counter > desc0->counter && + (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) + return 1; + + return -1; +} + +static int +vhost_check_queue_inflights_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct vring_used *used = vq->used; + uint16_t i = 0; + uint16_t resubmit_num = 0; + struct resubmit_info *resubmit = NULL; + + if (!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + + if ((!vq->inflight_split)) + return RTE_VHOST_MSG_RESULT_ERR; + + if (!vq->inflight_split->version) { + vq->inflight_split->version = INFLIGHT_VERSION; + return RTE_VHOST_MSG_RESULT_OK; + } + + vq->resubmit_inflight_split = NULL; + vq->global_counter = 0; + + if (vq->inflight_split->used_idx != used->idx) { + vq->inflight_split->desc[vq->inflight_split->last_inflight_io].inflight = 0; + rte_compiler_barrier(); + vq->inflight_split->used_idx = used->idx; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) + resubmit_num++; + } + + vq->last_avail_idx += resubmit_num; + + if (resubmit_num) { + resubmit = calloc(1, sizeof(struct resubmit_info)); + if (!resubmit) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for resubmit info.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + resubmit->resubmit_list = calloc(resubmit_num, + sizeof(struct resubmit_desc)); + if (!resubmit->resubmit_list) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for inflight desc.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) { + resubmit->resubmit_list[resubmit->resubmit_num].index = i; + resubmit->resubmit_list[resubmit->resubmit_num].counter = + vq->inflight_split->desc[i].counter; + resubmit->resubmit_num++; + } + } + + if (resubmit->resubmit_num > 1) + qsort(resubmit->resubmit_list, resubmit->resubmit_num, + sizeof(struct resubmit_desc), resubmit_desc_compare); + + vq->global_counter = resubmit->resubmit_list[0].counter + 1; + vq->resubmit_inflight_split = resubmit; + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1242,6 +1547,12 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, close(vq->kickfd); vq->kickfd = file.fd; + if (vhost_check_queue_inflights_split(dev, vq)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to inflights for vq: %d\n", file.index); + return RTE_VHOST_MSG_RESULT_ERR; + } + return RTE_VHOST_MSG_RESULT_OK; } @@ -1762,6 +2073,8 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, + [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, + [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, }; diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 2a650fe4b..99a773910 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -54,7 +54,9 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_POSTCOPY_END = 30, - VHOST_USER_MAX = 31 + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_MAX = 33 } VhostUserRequest; typedef enum VhostUserSlaveRequest { @@ -112,6 +114,13 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + typedef struct VhostUserMsg { union { uint32_t master; /* a VhostUserRequest value */ @@ -131,6 +140,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + VhostUserInflight inflight; struct vhost_iotlb_msg iotlb; VhostUserCryptoSessionParam crypto_session; VhostUserVringArea area; @@ -148,6 +158,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); +void *inflight_mem_alloc(const char *name, size_t size, int *fd); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, -- 2.17.2 ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20190708183959.50293>]
* [dpdk-dev] [PATCH v2 1/2] vhost: support inflight share memory protocol feature [not found] <20190708183959.50293> @ 2019-07-10 10:43 ` JinYu 2019-07-10 7:18 ` Tiwei Bie 0 siblings, 1 reply; 13+ messages in thread From: JinYu @ 2019-07-10 10:43 UTC (permalink / raw) To: dev Cc: changpeng.liu, maxime.coquelin, tiwei.bie, zhihong.wang, JinYu, LinLi, XunNi, YuZhang This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer between qemu and backend. Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer from backend. Then qemu should send it back through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. This shared buffer is used to process inflight I/O when backend reconnect. Signed-off-by: LinLi <lilin24@baidu.com> Signed-off-by: XunNi <nixun@baidu.com> Signed-off-by: YuZhang <zhangyu31@baidu.com> Signed-off-by: JinYu <jin.yu@intel.com> --- v1 - specify the APIs are split-ring only v2 - fix APIs and judge split or packed --- lib/librte_vhost/rte_vhost.h | 105 +++++++++ lib/librte_vhost/rte_vhost_version.map | 4 + lib/librte_vhost/vhost.c | 163 ++++++++++++- lib/librte_vhost/vhost.h | 16 ++ lib/librte_vhost/vhost_user.c | 313 +++++++++++++++++++++++++ lib/librte_vhost/vhost_user.h | 13 +- 6 files changed, 612 insertions(+), 2 deletions(-) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 0226b3eff..a17aff876 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -71,6 +71,10 @@ extern "C" { #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 #endif +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + /** Indicate whether protocol features negotiation is supported. */ #ifndef VHOST_USER_F_PROTOCOL_FEATURES #define VHOST_USER_F_PROTOCOL_FEATURES 30 @@ -98,6 +102,41 @@ struct rte_vhost_memory { struct rte_vhost_mem_region regions[]; }; +struct inflight_desc_split { + uint8_t inflight; + uint8_t padding[5]; + uint16_t next; + uint64_t counter; +}; + +struct inflight_info_split { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t last_inflight_io; + uint16_t used_idx; + struct inflight_desc_split desc[0]; +}; + +struct resubmit_desc { + uint16_t index; + uint64_t counter; +}; + +struct resubmit_info { + struct resubmit_desc *resubmit_list; + uint16_t resubmit_num; +}; + +struct rte_vhost_ring_inflight_split { + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + + struct resubmit_info *resubmit_inflight_split; +}; + struct rte_vhost_vring { struct vring_desc *desc; struct vring_avail *avail; @@ -603,6 +642,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, */ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); +/** + * Get guest inflight vring info, including inflight ring and resubmit list. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested inflight vring info + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring); + /** * Get guest vring info, including the vring address, vring size, etc. * @@ -631,6 +686,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, */ int rte_vhost_vring_call(int vid, uint16_t vring_idx); +/** + * set inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t idx); + +/** + * clear inflight flag for a desc. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param last_used_idx + * next free used_idx + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx); + +/** + * set last inflight io index. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, + uint16_t idx); + /** * Get vhost RX queue avail count. * diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 5f1d4a75c..57a166f4f 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -87,4 +87,8 @@ EXPERIMENTAL { rte_vdpa_relay_vring_used; rte_vhost_extern_callback_register; rte_vhost_driver_set_protocol_features; + rte_vhost_set_inflight_desc_split; + rte_vhost_clr_inflight_desc_split; + rte_vhost_set_last_inflight_io_split; + rte_vhost_get_vhost_ring_inflight_split; }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 981837b5d..011e37e39 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -242,6 +242,30 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy) close(vq->kickfd); } +void +cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + if (!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "function only be used for split inflight.\n"); + } else { + if (vq->inflight_split) + vq->inflight_split = NULL; + if (vq->resubmit_inflight_split) { + if (vq->resubmit_inflight_split->resubmit_list) { + free(vq->resubmit_inflight_split->resubmit_list); + vq->resubmit_inflight_split->resubmit_list = NULL; + } + free(vq->resubmit_inflight_split); + vq->resubmit_inflight_split = NULL; + } + } +} + /* * Unmap any memory, close any file descriptors and * free any memory owned by a device. @@ -253,8 +277,10 @@ cleanup_device(struct virtio_net *dev, int destroy) vhost_backend_cleanup(dev); - for (i = 0; i < dev->nr_vring; i++) + for (i = 0; i < dev->nr_vring; i++) { cleanup_vq(dev->virtqueue[i], destroy); + cleanup_vq_inflight_split(dev, dev->virtqueue[i]); + } } void @@ -726,6 +752,40 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) return 0; } +int +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight_split *vring) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + struct inflight_info_split *inflight_vring; + + dev = get_device(vid); + if (dev == NULL || vring == NULL) + return -1; + + if (vring_idx >= VHOST_MAX_VRING) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + inflight_vring = vq->inflight_split; + if (!inflight_vring) + return -1; + + if (!vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, "Can't be packed inflight ring.\n"); + } else { + vring->inflight_split = inflight_vring; + vring->resubmit_inflight_split = vq->resubmit_inflight_split; + } + + return 0; +} + int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring) @@ -781,6 +841,107 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) return 0; } +int +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->desc[idx].counter = vq->global_counter++; + vq->inflight_split->desc[idx].inflight = 1; + return 0; +} + +int +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (unlikely(!vq)) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + rte_compiler_barrier(); + + vq->inflight_split->desc[idx].inflight = 0; + + rte_compiler_barrier(); + + vq->inflight_split->used_idx = last_used_idx; + return 0; +} + +int +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, uint16_t idx) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (unlikely(!dev)) + return -1; + + if (unlikely(!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))) + return 0; + + if (unlikely(vq_is_packed(dev))) + return -1; + + if (unlikely(vring_idx >= VHOST_MAX_VRING)) + return -1; + + vq = dev->virtqueue[vring_idx]; + if (!vq) + return -1; + + if (unlikely(!vq->inflight_split)) + return -1; + + vq->inflight_split->last_inflight_io = idx; + return 0; +} + uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 884befa85..22925586d 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -128,6 +128,14 @@ struct vhost_virtqueue { /* Physical address of used ring, for logging */ uint64_t log_guest_addr; + /* inflight share memory info */ + union { + struct inflight_info_split *inflight_split; + /* TODO */ + }; + struct resubmit_info *resubmit_inflight_split; + uint64_t global_counter; + uint16_t nr_zmbuf; uint16_t zmbuf_size; uint16_t last_zmbuf_idx; @@ -286,6 +294,12 @@ struct guest_page { uint64_t size; }; +struct inflight_mem_info { + int fd; + void *addr; + uint64_t size; +}; + /** * Device structure contains all configuration information relating * to the device. @@ -303,6 +317,7 @@ struct virtio_net { uint32_t nr_vring; int dequeue_zero_copy; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; + struct inflight_mem_info inflight_info; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) char ifname[IF_NAME_SZ]; uint64_t log_size; @@ -467,6 +482,7 @@ void vhost_destroy_device(int); void vhost_destroy_device_notify(struct virtio_net *dev); void cleanup_vq(struct vhost_virtqueue *vq, int destroy); +void cleanup_vq_inflight_split(struct virtio_net *dev, struct vhost_virtqueue *vq); void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq); int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index c9e29ece8..1119e11d1 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -31,6 +31,8 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <assert.h> +#include <sys/syscall.h> +#include <asm/unistd.h> #ifdef RTE_LIBRTE_VHOST_NUMA #include <numaif.h> #endif @@ -49,6 +51,15 @@ #define VIRTIO_MIN_MTU 68 #define VIRTIO_MAX_MTU 65535 +#define INFLIGHT_ALIGNMENT 64 +#define INFLIGHT_VERSION 0xabcd +#define VIRTQUEUE_MAX_SIZE 1024 + +#define CLOEXEC 0x0001U + +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NONE] = "VHOST_USER_NONE", [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", @@ -78,6 +89,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", [VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN", [VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END", + [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD", + [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD", }; static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg); @@ -160,6 +173,16 @@ vhost_backend_cleanup(struct virtio_net *dev) dev->log_addr = 0; } + if (dev->inflight_info.addr) { + munmap(dev->inflight_info.addr, dev->inflight_info.size); + dev->inflight_info.addr = NULL; + } + + if (dev->inflight_info.fd > 0) { + close(dev->inflight_info.fd); + dev->inflight_info.fd = -1; + } + if (dev->slave_req_fd >= 0) { close(dev->slave_req_fd); dev->slave_req_fd = -1; @@ -306,6 +329,7 @@ vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg, dev->virtqueue[dev->nr_vring] = NULL; cleanup_vq(vq, 1); + cleanup_vq_inflight_split(dev, vq); free_vq(dev, vq); } } @@ -1165,6 +1189,202 @@ virtio_is_ready(struct virtio_net *dev) return 1; } +static int mem_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} + +void *inflight_mem_alloc(const char *name, size_t size, int *fd) +{ + void *ptr; + int mfd = -1; + char fname[20] = "/tmp/memfd-XXXXXX"; + + *fd = -1; + mfd = mem_create(name, CLOEXEC); + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } else { + mfd = mkstemp(fname); + unlink(fname); + + if (mfd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "mkstemp fail for alloc inflight buffer\n"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "ftruncate fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap fail for alloc inflight buffer\n"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +static uint32_t get_pervq_shm_size_split(uint16_t queue_size) +{ + return ALIGN_UP(sizeof(struct inflight_desc_split) * queue_size + + sizeof(uint64_t) * 1 + sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT); +} + +static int +vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd; + uint64_t mmap_size; + void *addr; + uint16_t num_queues, queue_size; + struct virtio_net *dev = *pdev; + + if (msg->size != sizeof(msg->payload.inflight)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Invalid get_inflight_fd message size is %d", + msg->size); + return RTE_VHOST_MSG_RESULT_ERR; + } + + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n", + msg->payload.inflight.num_queues); + RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n", + msg->payload.inflight.queue_size); + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + mmap_size = num_queues * get_pervq_shm_size_split(queue_size); + + addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd); + if (!addr) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to alloc vhost inflight area"); + msg->payload.inflight.mmap_size = 0; + return RTE_VHOST_MSG_RESULT_ERR; + } + memset(addr, 0, mmap_size); + + dev->inflight_info.addr = addr; + dev->inflight_info.size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info.fd = msg->fds[0] = fd; + msg->payload.inflight.mmap_offset = 0; + msg->fd_num = 1; + + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_size: %lu\n", + msg->payload.inflight.mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight mmap_offset: %lu\n", + msg->payload.inflight.mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "send inflight fd: %d\n", msg->fds[0]); + + return RTE_VHOST_MSG_RESULT_REPLY; +} + +static int +vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int main_fd __rte_unused) +{ + int fd, i; + uint64_t mmap_size, mmap_offset; + uint16_t num_queues, queue_size; + uint32_t pervq_inflight_size; + void *addr; + struct vhost_virtqueue *vq; + struct virtio_net *dev = *pdev; + + fd = msg->fds[0]; + if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "Invalid set_inflight_fd message size is %d,fd is %d\n", + msg->size, fd); + return RTE_VHOST_MSG_RESULT_ERR; + } + + mmap_size = msg->payload.inflight.mmap_size; + mmap_offset = msg->payload.inflight.mmap_offset; + num_queues = msg->payload.inflight.num_queues; + queue_size = msg->payload.inflight.queue_size; + + if (vq_is_packed(dev)) { + /* TODO */ + RTE_LOG(ERR, VHOST_CONFIG, + "Don't support the packed ring\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + pervq_inflight_size = get_pervq_shm_size_split(queue_size); + + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_size: %lu\n", mmap_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd mmap_offset: %lu\n", mmap_offset); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd num_queues: %u\n", num_queues); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd queue_size: %u\n", queue_size); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd fd: %d\n", fd); + RTE_LOG(INFO, VHOST_CONFIG, + "set_inflight_fd pervq_inflight_size: %d\n", + pervq_inflight_size); + + if (dev->inflight_info.addr) + munmap(dev->inflight_info.addr, dev->inflight_info.size); + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + if (dev->inflight_info.fd) + close(dev->inflight_info.fd); + + dev->inflight_info.fd = fd; + dev->inflight_info.addr = addr; + dev->inflight_info.size = mmap_size; + + for (i = 0; i < num_queues; i++) { + vq = dev->virtqueue[i]; + if (vq_is_packed(dev)) { + /* TODO */ + } else { + vq->inflight_split = (struct inflight_info_split *)addr; + vq->inflight_split->desc_num = queue_size; + } + addr = (void *)((char *)addr + pervq_inflight_size); + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1201,6 +1421,91 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused, return RTE_VHOST_MSG_RESULT_OK; } +static int +resubmit_desc_compare(const void *a, const void *b) +{ + const struct resubmit_desc *desc0 = (const struct resubmit_desc *)a; + const struct resubmit_desc *desc1 = (const struct resubmit_desc *)b; + + if (desc1->counter > desc0->counter && + (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) + return 1; + + return -1; +} + +static int +vhost_check_queue_inflights_split(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct vring_used *used = vq->used; + uint16_t i = 0; + uint16_t resubmit_num = 0; + struct resubmit_info *resubmit = NULL; + struct inflight_info_split *inflight_split; + + if (!(dev->protocol_features & + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + + if ((!vq->inflight_split)) + return RTE_VHOST_MSG_RESULT_ERR; + + if (!vq->inflight_split->version) { + vq->inflight_split->version = INFLIGHT_VERSION; + return RTE_VHOST_MSG_RESULT_OK; + } + + inflight_split = vq->inflight_split; + vq->resubmit_inflight_split = NULL; + vq->global_counter = 0; + + if (inflight_split->used_idx != used->idx) { + inflight_split->desc[inflight_split->last_inflight_io].inflight = 0; + rte_compiler_barrier(); + inflight_split->used_idx = used->idx; + } + + for (i = 0; i < inflight_split->desc_num; i++) { + if (inflight_split->desc[i].inflight == 1) + resubmit_num++; + } + + vq->last_avail_idx += resubmit_num; + + if (resubmit_num) { + resubmit = calloc(1, sizeof(struct resubmit_info)); + if (!resubmit) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for resubmit info.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + resubmit->resubmit_list = calloc(resubmit_num, + sizeof(struct resubmit_desc)); + if (!resubmit->resubmit_list) { + RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for inflight desc.\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + + for (i = 0; i < vq->inflight_split->desc_num; i++) { + if (vq->inflight_split->desc[i].inflight == 1) { + resubmit->resubmit_list[resubmit->resubmit_num].index = i; + resubmit->resubmit_list[resubmit->resubmit_num].counter = + inflight_split->desc[i].counter; + resubmit->resubmit_num++; + } + } + + if (resubmit->resubmit_num > 1) + qsort(resubmit->resubmit_list, resubmit->resubmit_num, + sizeof(struct resubmit_desc), resubmit_desc_compare); + + vq->global_counter = resubmit->resubmit_list[0].counter + 1; + vq->resubmit_inflight_split = resubmit; + } + + return RTE_VHOST_MSG_RESULT_OK; +} + static int vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, int main_fd __rte_unused) @@ -1242,6 +1547,12 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, close(vq->kickfd); vq->kickfd = file.fd; + if (vhost_check_queue_inflights_split(dev, vq)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to inflights for vq: %d\n", file.index); + return RTE_VHOST_MSG_RESULT_ERR; + } + return RTE_VHOST_MSG_RESULT_OK; } @@ -1762,6 +2073,8 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = { [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, + [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd, + [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd, }; diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index 2a650fe4b..99a773910 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -54,7 +54,9 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_POSTCOPY_END = 30, - VHOST_USER_MAX = 31 + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_MAX = 33 } VhostUserRequest; typedef enum VhostUserSlaveRequest { @@ -112,6 +114,13 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + typedef struct VhostUserMsg { union { uint32_t master; /* a VhostUserRequest value */ @@ -131,6 +140,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + VhostUserInflight inflight; struct vhost_iotlb_msg iotlb; VhostUserCryptoSessionParam crypto_session; VhostUserVringArea area; @@ -148,6 +158,7 @@ typedef struct VhostUserMsg { /* vhost_user.c */ int vhost_user_msg_handler(int vid, int fd); int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm); +void *inflight_mem_alloc(const char *name, size_t size, int *fd); /* socket.c */ int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, -- 2.17.2 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] vhost: support inflight share memory protocol feature 2019-07-10 10:43 ` [dpdk-dev] [PATCH v2 1/2] vhost: " JinYu @ 2019-07-10 7:18 ` Tiwei Bie 2019-07-11 2:09 ` Yu, Jin 0 siblings, 1 reply; 13+ messages in thread From: Tiwei Bie @ 2019-07-10 7:18 UTC (permalink / raw) To: JinYu Cc: dev, changpeng.liu, maxime.coquelin, zhihong.wang, LinLi, XunNi, YuZhang On Wed, Jul 10, 2019 at 06:43:55PM +0800, JinYu wrote: > This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD > and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared > buffer between qemu and backend. > > Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the > shared buffer from backend. Then qemu should send it back > through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. > > This shared buffer is used to process inflight I/O when backend > reconnect. I saw you send several patch sets for v2. It's a bit confusing. Please bump the version when you send a new series. Besides, please keep the cover letter. > > Signed-off-by: LinLi <lilin24@baidu.com> > Signed-off-by: XunNi <nixun@baidu.com> > Signed-off-by: YuZhang <zhangyu31@baidu.com> > Signed-off-by: JinYu <jin.yu@intel.com> There should be a space between FirstName and LastName. [...] > > +struct inflight_desc_split { > + uint8_t inflight; > + uint8_t padding[5]; > + uint16_t next; > + uint64_t counter; > +}; > + > +struct inflight_info_split { > + uint64_t features; > + uint16_t version; > + uint16_t desc_num; > + uint16_t last_inflight_io; > + uint16_t used_idx; > + struct inflight_desc_split desc[0]; > +}; I haven't looked into the details yet. Is it necessary to expose above definitions to applications? It seems they are not used in your example code. > + > +struct resubmit_desc { > + uint16_t index; > + uint64_t counter; > +}; > + > +struct resubmit_info { > + struct resubmit_desc *resubmit_list; > + uint16_t resubmit_num; > +}; The rte_vhost_ prefix is needed for vhost APIs. > + > +struct rte_vhost_ring_inflight_split { > + union { > + struct inflight_info_split *inflight_split; > + /* TODO */ > + }; Ditto. Seems it's not used in example code. > + > + struct resubmit_info *resubmit_inflight_split; > +}; > + > struct rte_vhost_vring { > struct vring_desc *desc; > struct vring_avail *avail; > @@ -603,6 +642,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, > */ > int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); > > +/** > + * Get guest inflight vring info, including inflight ring and resubmit list. > + * > + * @param vid > + * vhost device ID > + * @param vring_idx > + * vring index > + * @param vring > + * the structure to hold the requested inflight vring info > + * @return > + * 0 on success, -1 on failure > + */ > +int __rte_experimental > +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, > + struct rte_vhost_ring_inflight_split *vring); > + > /** > * Get guest vring info, including the vring address, vring size, etc. > * > @@ -631,6 +686,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, > */ > int rte_vhost_vring_call(int vid, uint16_t vring_idx); > > +/** > + * set inflight flag for a desc. > + * > + * @param vid > + * vhost device ID > + * @param vring_idx > + * vring index > + * @param idx > + * inflight entry index > + * @return > + * 0 on success, -1 on failure > + */ > +int __rte_experimental > +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, > + uint16_t idx); [...] > +int __rte_experimental > +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, > + uint16_t last_used_idx, uint16_t idx); [...] > +int __rte_experimental > +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, > + uint16_t idx); What will be different in the function prototype when we design above three APIs for packed ring? ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] vhost: support inflight share memory protocol feature 2019-07-10 7:18 ` Tiwei Bie @ 2019-07-11 2:09 ` Yu, Jin 2019-07-11 2:18 ` Tiwei Bie 0 siblings, 1 reply; 13+ messages in thread From: Yu, Jin @ 2019-07-11 2:09 UTC (permalink / raw) To: Bie, Tiwei Cc: dev, Liu, Changpeng, maxime.coquelin, Wang, Zhihong, LinLi, XunNi, YuZhang Hi Tiwei, thanks. > -----Original Message----- > From: Bie, Tiwei > Sent: Wednesday, July 10, 2019 3:18 PM > To: Yu, Jin <jin.yu@intel.com> > Cc: dev@dpdk.org; Liu, Changpeng <changpeng.liu@intel.com>; > maxime.coquelin@redhat.com; Wang, Zhihong <zhihong.wang@intel.com>; > LinLi <lilin24@baidu.com>; XunNi <nixun@baidu.com>; YuZhang > <zhangyu31@baidu.com> > Subject: Re: [PATCH v2 1/2] vhost: support inflight share memory protocol > feature > > On Wed, Jul 10, 2019 at 06:43:55PM +0800, JinYu wrote: > > This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD > and > > VHOST_USER_SET_INFLIGHT_FD to support transferring a shared buffer > > between qemu and backend. > > > > Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the shared buffer > > from backend. Then qemu should send it back through > > VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user. > > > > This shared buffer is used to process inflight I/O when backend > > reconnect. > > I saw you send several patch sets for v2. It's a bit confusing. Please bump the > version when you send a new series. Besides, please keep the cover letter. Sorry for the confusion. There are some misoperations and I have set them superseded. > > > > > Signed-off-by: LinLi <lilin24@baidu.com> > > Signed-off-by: XunNi <nixun@baidu.com> > > Signed-off-by: YuZhang <zhangyu31@baidu.com> > > Signed-off-by: JinYu <jin.yu@intel.com> > > There should be a space between FirstName and LastName. Got it. > > [...] > > > > +struct inflight_desc_split { > > + uint8_t inflight; > > + uint8_t padding[5]; > > + uint16_t next; > > + uint64_t counter; > > +}; > > + > > +struct inflight_info_split { > > + uint64_t features; > > + uint16_t version; > > + uint16_t desc_num; > > + uint16_t last_inflight_io; > > + uint16_t used_idx; > > + struct inflight_desc_split desc[0]; > > +}; > > I haven't looked into the details yet. Is it necessary to expose above > definitions to applications? It seems they are not used in your example code. Will revise in the next version. > > > + > > +struct resubmit_desc { > > + uint16_t index; > > + uint64_t counter; > > +}; > > + > > +struct resubmit_info { > > + struct resubmit_desc *resubmit_list; > > + uint16_t resubmit_num; > > +}; > > The rte_vhost_ prefix is needed for vhost APIs. > > > + > > +struct rte_vhost_ring_inflight_split { > > + union { > > + struct inflight_info_split *inflight_split; > > + /* TODO */ > > + }; > > Ditto. Seems it's not used in example code. > > > + > > + struct resubmit_info *resubmit_inflight_split; }; > > + > > struct rte_vhost_vring { > > struct vring_desc *desc; > > struct vring_avail *avail; > > @@ -603,6 +642,22 @@ uint16_t rte_vhost_dequeue_burst(int vid, > uint16_t queue_id, > > */ > > int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); > > > > +/** > > + * Get guest inflight vring info, including inflight ring and resubmit list. > > + * > > + * @param vid > > + * vhost device ID > > + * @param vring_idx > > + * vring index > > + * @param vring > > + * the structure to hold the requested inflight vring info > > + * @return > > + * 0 on success, -1 on failure > > + */ > > +int __rte_experimental > > +rte_vhost_get_vhost_ring_inflight_split(int vid, uint16_t vring_idx, > > + struct rte_vhost_ring_inflight_split *vring); > > + > > /** > > * Get guest vring info, including the vring address, vring size, etc. > > * > > @@ -631,6 +686,56 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t > vring_idx, > > */ > > int rte_vhost_vring_call(int vid, uint16_t vring_idx); > > > > +/** > > + * set inflight flag for a desc. > > + * > > + * @param vid > > + * vhost device ID > > + * @param vring_idx > > + * vring index > > + * @param idx > > + * inflight entry index > > + * @return > > + * 0 on success, -1 on failure > > + */ > > +int __rte_experimental > > +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, > > + uint16_t idx); > [...] > > +int __rte_experimental > > +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, > > + uint16_t last_used_idx, uint16_t idx); > [...] > > +int __rte_experimental > > +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, > > + uint16_t idx); > > What will be different in the function prototype when we design above three > APIs for packed ring? The whole process is quite different as there are no avail ring and userd ring. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] vhost: support inflight share memory protocol feature 2019-07-11 2:09 ` Yu, Jin @ 2019-07-11 2:18 ` Tiwei Bie 2019-07-11 2:39 ` Yu, Jin 0 siblings, 1 reply; 13+ messages in thread From: Tiwei Bie @ 2019-07-11 2:18 UTC (permalink / raw) To: Yu, Jin Cc: dev, Liu, Changpeng, maxime.coquelin, Wang, Zhihong, LinLi, XunNi, YuZhang On Thu, Jul 11, 2019 at 10:09:26AM +0800, Yu, Jin wrote: [...] > > > +int __rte_experimental > > > +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, > > > + uint16_t idx); > > [...] > > > +int __rte_experimental > > > +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, > > > + uint16_t last_used_idx, uint16_t idx); > > [...] > > > +int __rte_experimental > > > +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, > > > + uint16_t idx); > > > > What will be different in the function prototype when we design above three > > APIs for packed ring? > The whole process is quite different as there are no avail ring and userd ring. I mean what will be different in the return value and parameters? Could you show us the expected function prototypes (i.e. just the function declarations) for packed ring? ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] vhost: support inflight share memory protocol feature 2019-07-11 2:18 ` Tiwei Bie @ 2019-07-11 2:39 ` Yu, Jin 2019-07-11 4:09 ` Tiwei Bie 0 siblings, 1 reply; 13+ messages in thread From: Yu, Jin @ 2019-07-11 2:39 UTC (permalink / raw) To: Bie, Tiwei Cc: dev, Liu, Changpeng, maxime.coquelin, Wang, Zhihong, LinLi, XunNi, YuZhang > On Thu, Jul 11, 2019 at 10:09:26AM +0800, Yu, Jin wrote: > [...] > > > > +int __rte_experimental > > > > +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, > > > > + uint16_t idx); > > > [...] > > > > +int __rte_experimental > > > > +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, > > > > + uint16_t last_used_idx, uint16_t idx); > > > [...] > > > > +int __rte_experimental > > > > +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, > > > > + uint16_t idx); > > > > > > What will be different in the function prototype when we design > > > above three APIs for packed ring? > > The whole process is quite different as there are no avail ring and userd > ring. > > I mean what will be different in the return value and parameters? > Could you show us the expected function prototypes (i.e. just the function > declarations) for packed ring? Sorry I am not sure, I am working on this. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/2] vhost: support inflight share memory protocol feature 2019-07-11 2:39 ` Yu, Jin @ 2019-07-11 4:09 ` Tiwei Bie 0 siblings, 0 replies; 13+ messages in thread From: Tiwei Bie @ 2019-07-11 4:09 UTC (permalink / raw) To: Yu, Jin Cc: dev, Liu, Changpeng, maxime.coquelin, Wang, Zhihong, LinLi, XunNi, YuZhang On Thu, Jul 11, 2019 at 10:39:46AM +0800, Yu, Jin wrote: > > On Thu, Jul 11, 2019 at 10:09:26AM +0800, Yu, Jin wrote: > > [...] > > > > > +int __rte_experimental > > > > > +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, > > > > > + uint16_t idx); > > > > [...] > > > > > +int __rte_experimental > > > > > +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, > > > > > + uint16_t last_used_idx, uint16_t idx); > > > > [...] > > > > > +int __rte_experimental > > > > > +rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, > > > > > + uint16_t idx); > > > > > > > > What will be different in the function prototype when we design > > > > above three APIs for packed ring? > > > The whole process is quite different as there are no avail ring and userd > > ring. > > > > I mean what will be different in the return value and parameters? > > Could you show us the expected function prototypes (i.e. just the function > > declarations) for packed ring? > Sorry I am not sure, I am working on this. It doesn't matter :) But please consider it as part of the work in this series, as it would be helpful for us to design the APIs if we know the expected APIs for packed ring. Thanks, Tiwei ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2019-07-11 4:10 UTC | newest] Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2019-06-26 16:38 [dpdk-dev] [PATCH] [v1]vhost: support inflight share memory protocol feature JinYu 2019-07-05 8:47 ` Maxime Coquelin 2019-07-08 7:23 ` Yu, Jin 2019-07-08 17:53 ` [dpdk-dev] [PATCH] [v2, 1/2]vhost: " JinYu 2019-07-08 18:00 ` JinYu 2019-07-08 18:00 ` [dpdk-dev] [PATCH v2] " JinYu 2019-07-08 18:16 ` [dpdk-dev] [PATCH v2] [1/2]vhost: " JinYu [not found] <20190708183959.50293> 2019-07-10 10:43 ` [dpdk-dev] [PATCH v2 1/2] vhost: " JinYu 2019-07-10 7:18 ` Tiwei Bie 2019-07-11 2:09 ` Yu, Jin 2019-07-11 2:18 ` Tiwei Bie 2019-07-11 2:39 ` Yu, Jin 2019-07-11 4:09 ` Tiwei Bie
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).