From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 45466A00C5; Mon, 6 Jul 2020 13:54:06 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id E69891DAAB; Mon, 6 Jul 2020 13:54:01 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 2273C1DA9C for ; Mon, 6 Jul 2020 13:53:59 +0200 (CEST) IronPort-SDR: +pnHCaL3lHEjCGq0b54F01e/lIKqXje0YEOGLk8l8h2UudFE6e0iBqJXMBZtAO/9l9U9OGgalC U1ufmaKgofvg== X-IronPort-AV: E=McAfee;i="6000,8403,9673"; a="148899432" X-IronPort-AV: E=Sophos;i="5.75,318,1589266800"; d="scan'208";a="148899432" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga102.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 06 Jul 2020 04:53:58 -0700 IronPort-SDR: dlCADQzhs82JSdaAYDMSSQCeuCR+WC2LvzhxXWaSUci8LyOZp6gH82yH05Prz1uax8/kLW//w/ 1m9QZE8Rr7WA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,318,1589266800"; d="scan'208";a="427077522" Received: from npg-dpdk-patrickfu-casc2.sh.intel.com ([10.67.119.92]) by orsmga004.jf.intel.com with ESMTP; 06 Jul 2020 04:53:56 -0700 From: patrick.fu@intel.com To: dev@dpdk.org, maxime.coquelin@redhat.com, chenbo.xia@intel.com, zhihong.wang@intel.com Cc: patrick.fu@intel.com, yinan.wang@intel.com, cheng1.jiang@intel.com, cunming.liang@intel.com Date: Mon, 6 Jul 2020 19:53:41 +0800 Message-Id: <20200706115342.288731-2-patrick.fu@intel.com> X-Mailer: git-send-email 2.18.4 In-Reply-To: <20200706115342.288731-1-patrick.fu@intel.com> References: <1591869725-13331-1-git-send-email-patrick.fu@intel.com> <20200706115342.288731-1-patrick.fu@intel.com> Subject: [dpdk-dev] [PATCH v5 1/2] vhost: introduce async enqueue registration API X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Patrick Fu This patch introduces registration/un-registration APIs for vhost async data enqueue operation. Together with the registration APIs implementations, data structures and async callback functions required for async enqueue data path are also defined. Signed-off-by: Patrick Fu --- lib/librte_vhost/Makefile | 2 +- lib/librte_vhost/meson.build | 2 +- lib/librte_vhost/rte_vhost.h | 1 + lib/librte_vhost/rte_vhost_async.h | 136 +++++++++++++++++++++++++ lib/librte_vhost/rte_vhost_version.map | 4 + lib/librte_vhost/socket.c | 27 +++++ lib/librte_vhost/vhost.c | 127 ++++++++++++++++++++++- lib/librte_vhost/vhost.h | 30 +++++- lib/librte_vhost/vhost_user.c | 23 ++++- 9 files changed, 345 insertions(+), 7 deletions(-) create mode 100644 lib/librte_vhost/rte_vhost_async.h diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index b7ff7dc4b..4f2f3e47d 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -42,7 +42,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \ # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h \ - rte_vdpa_dev.h + rte_vdpa_dev.h rte_vhost_async.h # only compile vhost crypto when cryptodev is enabled ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y) diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build index 882a0eaf4..cc9aa65c6 100644 --- a/lib/librte_vhost/meson.build +++ b/lib/librte_vhost/meson.build @@ -22,5 +22,5 @@ sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c', 'vhost.c', 'vhost_user.c', 'virtio_net.c', 'vhost_crypto.c') headers = files('rte_vhost.h', 'rte_vdpa.h', 'rte_vdpa_dev.h', - 'rte_vhost_crypto.h') + 'rte_vhost_crypto.h', 'rte_vhost_async.h') deps += ['ethdev', 'cryptodev', 'hash', 'pci'] diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 8a5c332c8..f93f9595a 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -35,6 +35,7 @@ extern "C" { #define RTE_VHOST_USER_EXTBUF_SUPPORT (1ULL << 5) /* support only linear buffers (no chained mbufs) */ #define RTE_VHOST_USER_LINEARBUF_SUPPORT (1ULL << 6) +#define RTE_VHOST_USER_ASYNC_COPY (1ULL << 7) /* Features. */ #ifndef VIRTIO_NET_F_GUEST_ANNOUNCE diff --git a/lib/librte_vhost/rte_vhost_async.h b/lib/librte_vhost/rte_vhost_async.h new file mode 100644 index 000000000..d5a59279a --- /dev/null +++ b/lib/librte_vhost/rte_vhost_async.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef _RTE_VHOST_ASYNC_H_ +#define _RTE_VHOST_ASYNC_H_ + +#include "rte_vhost.h" + +/** + * iovec iterator + */ +struct rte_vhost_iov_iter { + /** offset to the first byte of interesting data */ + size_t offset; + /** total bytes of data in this iterator */ + size_t count; + /** pointer to the iovec array */ + struct iovec *iov; + /** number of iovec in this iterator */ + unsigned long nr_segs; +}; + +/** + * dma transfer descriptor pair + */ +struct rte_vhost_async_desc { + /** source memory iov_iter */ + struct rte_vhost_iov_iter *src; + /** destination memory iov_iter */ + struct rte_vhost_iov_iter *dst; +}; + +/** + * dma transfer status + */ +struct rte_vhost_async_status { + /** An array of application specific data for source memory */ + uintptr_t *src_opaque_data; + /** An array of application specific data for destination memory */ + uintptr_t *dst_opaque_data; +}; + +/** + * dma operation callbacks to be implemented by applications + */ +struct rte_vhost_async_channel_ops { + /** + * instruct async engines to perform copies for a batch of packets + * + * @param vid + * id of vhost device to perform data copies + * @param queue_id + * queue id to perform data copies + * @param descs + * an array of DMA transfer memory descriptors + * @param opaque_data + * opaque data pair sending to DMA engine + * @param count + * number of elements in the "descs" array + * @return + * -1 on failure, number of descs processed on success + */ + int (*transfer_data)(int vid, uint16_t queue_id, + struct rte_vhost_async_desc *descs, + struct rte_vhost_async_status *opaque_data, + uint16_t count); + /** + * check copy-completed packets from the async engine + * @param vid + * id of vhost device to check copy completion + * @param queue_id + * queue id to check copyp completion + * @param opaque_data + * buffer to receive the opaque data pair from DMA engine + * @param max_packets + * max number of packets could be completed + * @return + * -1 on failure, number of iov segments completed on success + */ + int (*check_completed_copies)(int vid, uint16_t queue_id, + struct rte_vhost_async_status *opaque_data, + uint16_t max_packets); +}; + +/** + * dma channel feature bit definition + */ +struct rte_vhost_async_features { + union { + uint32_t intval; + struct { + uint32_t async_inorder:1; + uint32_t resvd_0:15; + uint32_t async_threshold:12; + uint32_t resvd_1:4; + }; + }; +}; + +/** + * register a async channel for vhost + * + * @param vid + * vhost device id async channel to be attached to + * @param queue_id + * vhost queue id async channel to be attached to + * @param features + * DMA channel feature bit + * b0 : DMA supports inorder data transfer + * b1 - b15: reserved + * b16 - b27: Packet length threshold for DMA transfer + * b28 - b31: reserved + * @param ops + * DMA operation callbacks + * @return + * 0 on success, -1 on failures + */ +__rte_experimental +int rte_vhost_async_channel_register(int vid, uint16_t queue_id, + uint32_t features, struct rte_vhost_async_channel_ops *ops); + +/** + * unregister a dma channel for vhost + * + * @param vid + * vhost device id DMA channel to be detached + * @param queue_id + * vhost queue id DMA channel to be detached + * @return + * 0 on success, -1 on failures + */ +__rte_experimental +int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id); + +#endif /* _RTE_VHOST_ASYNC_H_ */ diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 86784405a..13ec53b63 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -71,4 +71,8 @@ EXPERIMENTAL { rte_vdpa_get_queue_num; rte_vdpa_get_features; rte_vdpa_get_protocol_features; + rte_vhost_async_channel_register; + rte_vhost_async_channel_unregister; + rte_vhost_submit_enqueue_burst; + rte_vhost_poll_enqueue_completed; }; diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 49267cebf..c4626d2c4 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -42,6 +42,7 @@ struct vhost_user_socket { bool use_builtin_virtio_net; bool extbuf; bool linearbuf; + bool async_copy; /* * The "supported_features" indicates the feature bits the @@ -205,6 +206,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) size_t size; struct vhost_user_connection *conn; int ret; + struct virtio_net *dev; if (vsocket == NULL) return; @@ -236,6 +238,13 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) if (vsocket->linearbuf) vhost_enable_linearbuf(vid); + if (vsocket->async_copy) { + dev = get_device(vid); + + if (dev) + dev->async_copy = 1; + } + VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid); if (vsocket->notify_ops->new_connection) { @@ -881,6 +890,17 @@ rte_vhost_driver_register(const char *path, uint64_t flags) goto out_mutex; } + vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY; + + if (vsocket->async_copy && + (flags & (RTE_VHOST_USER_IOMMU_SUPPORT | + RTE_VHOST_USER_POSTCOPY_SUPPORT))) { + VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU " + "or post-copy feature simultaneously is not " + "supported\n"); + goto out_mutex; + } + /* * Set the supported features correctly for the builtin vhost-user * net driver. @@ -931,6 +951,13 @@ rte_vhost_driver_register(const char *path, uint64_t flags) ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT); } + if (vsocket->async_copy) { + vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL); + vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL); + VHOST_LOG_CONFIG(INFO, + "Logging feature is disabled in async copy mode\n"); + } + /* * We'll not be able to receive a buffer from guest in linear mode * without external buffer if it will not fit in a single mbuf, which is diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 0d822d6a3..a11385f39 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -332,8 +332,13 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq) { if (vq_is_packed(dev)) rte_free(vq->shadow_used_packed); - else + else { rte_free(vq->shadow_used_split); + if (vq->async_pkts_pending) + rte_free(vq->async_pkts_pending); + if (vq->async_pending_info) + rte_free(vq->async_pending_info); + } rte_free(vq->batch_copy_elems); rte_mempool_free(vq->iotlb_pool); rte_free(vq); @@ -1522,3 +1527,123 @@ RTE_INIT(vhost_log_init) if (vhost_data_log_level >= 0) rte_log_set_level(vhost_data_log_level, RTE_LOG_WARNING); } + +int rte_vhost_async_channel_register(int vid, uint16_t queue_id, + uint32_t features, + struct rte_vhost_async_channel_ops *ops) +{ + struct vhost_virtqueue *vq; + struct virtio_net *dev = get_device(vid); + struct rte_vhost_async_features f; + + if (dev == NULL || ops == NULL) + return -1; + + f.intval = features; + + vq = dev->virtqueue[queue_id]; + + if (unlikely(vq == NULL || !dev->async_copy)) + return -1; + + /* packed queue is not supported */ + if (unlikely(vq_is_packed(dev) || !f.async_inorder)) { + VHOST_LOG_CONFIG(ERR, + "async copy is not supported on packed queue or non-inorder mode " + "(vid %d, qid: %d)\n", vid, queue_id); + return -1; + } + + if (unlikely(ops->check_completed_copies == NULL || + ops->transfer_data == NULL)) + return -1; + + rte_spinlock_lock(&vq->access_lock); + + if (unlikely(vq->async_registered)) { + VHOST_LOG_CONFIG(ERR, + "async register failed: channel already registered " + "(vid %d, qid: %d)\n", vid, queue_id); + goto reg_out; + } + + vq->async_pkts_pending = rte_malloc(NULL, + vq->size * sizeof(uintptr_t), + RTE_CACHE_LINE_SIZE); + vq->async_pending_info = rte_malloc(NULL, + vq->size * sizeof(uint64_t), + RTE_CACHE_LINE_SIZE); + if (!vq->async_pkts_pending || !vq->async_pending_info) { + if (vq->async_pkts_pending) + rte_free(vq->async_pkts_pending); + + if (vq->async_pending_info) + rte_free(vq->async_pending_info); + + VHOST_LOG_CONFIG(ERR, + "async register failed: cannot allocate memory for vq data " + "(vid %d, qid: %d)\n", vid, queue_id); + goto reg_out; + } + + vq->async_ops.check_completed_copies = ops->check_completed_copies; + vq->async_ops.transfer_data = ops->transfer_data; + + vq->async_inorder = f.async_inorder; + vq->async_threshold = f.async_threshold; + + vq->async_registered = true; + +reg_out: + rte_spinlock_unlock(&vq->access_lock); + + return 0; +} + +int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id) +{ + struct vhost_virtqueue *vq; + struct virtio_net *dev = get_device(vid); + int ret = -1; + + if (dev == NULL) + return ret; + + vq = dev->virtqueue[queue_id]; + + if (vq == NULL) + return ret; + + ret = 0; + rte_spinlock_lock(&vq->access_lock); + + if (!vq->async_registered) + goto out; + + if (vq->async_pkts_inflight_n) { + VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. " + "async inflight packets must be completed before unregistration.\n"); + ret = -1; + goto out; + } + + if (vq->async_pkts_pending) { + rte_free(vq->async_pkts_pending); + vq->async_pkts_pending = NULL; + } + + if (vq->async_pending_info) { + rte_free(vq->async_pending_info); + vq->async_pending_info = NULL; + } + + vq->async_ops.transfer_data = NULL; + vq->async_ops.check_completed_copies = NULL; + vq->async_registered = false; + +out: + rte_spinlock_unlock(&vq->access_lock); + + return ret; +} + diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 034463699..f3731982b 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -24,6 +24,8 @@ #include "rte_vdpa.h" #include "rte_vdpa_dev.h" +#include "rte_vhost_async.h" + /* Used to indicate that the device is running on a data core */ #define VIRTIO_DEV_RUNNING 1 /* Used to indicate that the device is ready to operate */ @@ -40,6 +42,11 @@ #define VHOST_LOG_CACHE_NR 32 +#define MAX_PKT_BURST 32 + +#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2) +#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 2) + #define PACKED_DESC_ENQUEUE_USED_FLAG(w) \ ((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \ VRING_DESC_F_WRITE) @@ -202,6 +209,25 @@ struct vhost_virtqueue { TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list; int iotlb_cache_nr; TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list; + + /* operation callbacks for async dma */ + struct rte_vhost_async_channel_ops async_ops; + + struct rte_vhost_iov_iter it_pool[VHOST_MAX_ASYNC_IT]; + struct iovec vec_pool[VHOST_MAX_ASYNC_VEC]; + + /* async data transfer status */ + uintptr_t **async_pkts_pending; + #define ASYNC_PENDING_INFO_N_MSK 0xFFFF + #define ASYNC_PENDING_INFO_N_SFT 16 + uint64_t *async_pending_info; + uint16_t async_pkts_idx; + uint16_t async_pkts_inflight_n; + + /* vq async features */ + bool async_inorder; + bool async_registered; + uint16_t async_threshold; } __rte_cache_aligned; #define VHOST_MAX_VRING 0x100 @@ -338,6 +364,7 @@ struct virtio_net { int16_t broadcast_rarp; uint32_t nr_vring; int dequeue_zero_copy; + int async_copy; int extbuf; int linearbuf; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; @@ -683,7 +710,8 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq) /* Don't kick guest if we don't reach index specified by guest. */ if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) { uint16_t old = vq->signalled_used; - uint16_t new = vq->last_used_idx; + uint16_t new = vq->async_pkts_inflight_n ? + vq->used->idx:vq->last_used_idx; bool signalled_used_valid = vq->signalled_used_valid; vq->signalled_used = new; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 6039a8fdb..aa8605523 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -476,12 +476,14 @@ vhost_user_set_vring_num(struct virtio_net **pdev, } else { if (vq->shadow_used_split) rte_free(vq->shadow_used_split); + vq->shadow_used_split = rte_malloc(NULL, vq->size * sizeof(struct vring_used_elem), RTE_CACHE_LINE_SIZE); + if (!vq->shadow_used_split) { VHOST_LOG_CONFIG(ERR, - "failed to allocate memory for shadow used ring.\n"); + "failed to allocate memory for vq internal data.\n"); return RTE_VHOST_MSG_RESULT_ERR; } } @@ -1166,7 +1168,8 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, goto err_mmap; } - populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0; + populate = (dev->dequeue_zero_copy || dev->async_copy) ? + MAP_POPULATE : 0; mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED | populate, fd, 0); @@ -1181,7 +1184,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset; - if (dev->dequeue_zero_copy) + if (dev->dequeue_zero_copy || dev->async_copy) if (add_guest_pages(dev, reg, alignment) < 0) { VHOST_LOG_CONFIG(ERR, "adding guest pages to region %u failed.\n", @@ -1979,6 +1982,12 @@ vhost_user_get_vring_base(struct virtio_net **pdev, } else { rte_free(vq->shadow_used_split); vq->shadow_used_split = NULL; + if (vq->async_pkts_pending) + rte_free(vq->async_pkts_pending); + if (vq->async_pending_info) + rte_free(vq->async_pending_info); + vq->async_pkts_pending = NULL; + vq->async_pending_info = NULL; } rte_free(vq->batch_copy_elems); @@ -2012,6 +2021,14 @@ vhost_user_set_vring_enable(struct virtio_net **pdev, "set queue enable: %d to qp idx: %d\n", enable, index); + if (!enable && dev->virtqueue[index]->async_registered) { + if (dev->virtqueue[index]->async_pkts_inflight_n) { + VHOST_LOG_CONFIG(ERR, "failed to disable vring. " + "async inflight packets must be completed first\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + } + /* On disable, rings have to be stopped being processed. */ if (!enable && dev->dequeue_zero_copy) drain_zmbuf_list(dev->virtqueue[index]); -- 2.18.4