DPDK patches and discussions
 help / color / Atom feed
From: patrick.fu@intel.com
To: dev@dpdk.org, maxime.coquelin@redhat.com, chenbo.xia@intel.com,
	zhihong.wang@intel.com
Cc: patrick.fu@intel.com, yinan.wang@intel.com,
	cheng1.jiang@intel.com, cunming.liang@intel.com
Subject: [dpdk-dev] [PATCH v2 1/2] vhost: introduce async enqueue registration API
Date: Mon, 29 Jun 2020 22:44:08 +0800
Message-ID: <1593441849-27306-2-git-send-email-patrick.fu@intel.com> (raw)
In-Reply-To: <1593441849-27306-1-git-send-email-patrick.fu@intel.com>

From: Patrick Fu <patrick.fu@intel.com>

This patch introduces registration/un-registration APIs
for vhost async data enqueue operation. Together with
the registration APIs implementations, data structures
and async callback functions required for async enqueue
data path are also defined.

Signed-off-by: Patrick Fu <patrick.fu@intel.com>
---
 lib/librte_vhost/Makefile              |   2 +-
 lib/librte_vhost/meson.build           |   2 +-
 lib/librte_vhost/rte_vhost.h           |   1 +
 lib/librte_vhost/rte_vhost_async.h     | 136 +++++++++++++++++++++++++++++++++
 lib/librte_vhost/rte_vhost_version.map |   4 +
 lib/librte_vhost/socket.c              |  20 +++++
 lib/librte_vhost/vhost.c               | 127 +++++++++++++++++++++++++++++-
 lib/librte_vhost/vhost.h               |  30 +++++++-
 lib/librte_vhost/vhost_user.c          |  27 ++++++-
 9 files changed, 342 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_vhost/rte_vhost_async.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index b7ff7dc..4f2f3e4 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -42,7 +42,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h \
-						rte_vdpa_dev.h
+						rte_vdpa_dev.h rte_vhost_async.h
 
 # only compile vhost crypto when cryptodev is enabled
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index 882a0ea..cc9aa65 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -22,5 +22,5 @@ sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c',
 		'vhost.c', 'vhost_user.c',
 		'virtio_net.c', 'vhost_crypto.c')
 headers = files('rte_vhost.h', 'rte_vdpa.h', 'rte_vdpa_dev.h',
-		'rte_vhost_crypto.h')
+		'rte_vhost_crypto.h', 'rte_vhost_async.h')
 deps += ['ethdev', 'cryptodev', 'hash', 'pci']
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 2fbc364..28616f4 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -35,6 +35,7 @@
 #define RTE_VHOST_USER_EXTBUF_SUPPORT	(1ULL << 5)
 /* support only linear buffers (no chained mbufs) */
 #define RTE_VHOST_USER_LINEARBUF_SUPPORT	(1ULL << 6)
+#define RTE_VHOST_USER_ASYNC_COPY	(1ULL << 7)
 
 /** Protocol features. */
 #ifndef VHOST_USER_PROTOCOL_F_MQ
diff --git a/lib/librte_vhost/rte_vhost_async.h b/lib/librte_vhost/rte_vhost_async.h
new file mode 100644
index 0000000..d5a5927
--- /dev/null
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_VHOST_ASYNC_H_
+#define _RTE_VHOST_ASYNC_H_
+
+#include "rte_vhost.h"
+
+/**
+ * iovec iterator
+ */
+struct rte_vhost_iov_iter {
+	/** offset to the first byte of interesting data */
+	size_t offset;
+	/** total bytes of data in this iterator */
+	size_t count;
+	/** pointer to the iovec array */
+	struct iovec *iov;
+	/** number of iovec in this iterator */
+	unsigned long nr_segs;
+};
+
+/**
+ * dma transfer descriptor pair
+ */
+struct rte_vhost_async_desc {
+	/** source memory iov_iter */
+	struct rte_vhost_iov_iter *src;
+	/** destination memory iov_iter */
+	struct rte_vhost_iov_iter *dst;
+};
+
+/**
+ * dma transfer status
+ */
+struct rte_vhost_async_status {
+	/** An array of application specific data for source memory */
+	uintptr_t *src_opaque_data;
+	/** An array of application specific data for destination memory */
+	uintptr_t *dst_opaque_data;
+};
+
+/**
+ * dma operation callbacks to be implemented by applications
+ */
+struct rte_vhost_async_channel_ops {
+	/**
+	 * instruct async engines to perform copies for a batch of packets
+	 *
+	 * @param vid
+	 *  id of vhost device to perform data copies
+	 * @param queue_id
+	 *  queue id to perform data copies
+	 * @param descs
+	 *  an array of DMA transfer memory descriptors
+	 * @param opaque_data
+	 *  opaque data pair sending to DMA engine
+	 * @param count
+	 *  number of elements in the "descs" array
+	 * @return
+	 *  -1 on failure, number of descs processed on success
+	 */
+	int (*transfer_data)(int vid, uint16_t queue_id,
+		struct rte_vhost_async_desc *descs,
+		struct rte_vhost_async_status *opaque_data,
+		uint16_t count);
+	/**
+	 * check copy-completed packets from the async engine
+	 * @param vid
+	 *  id of vhost device to check copy completion
+	 * @param queue_id
+	 *  queue id to check copyp completion
+	 * @param opaque_data
+	 *  buffer to receive the opaque data pair from DMA engine
+	 * @param max_packets
+	 *  max number of packets could be completed
+	 * @return
+	 *  -1 on failure, number of iov segments completed on success
+	 */
+	int (*check_completed_copies)(int vid, uint16_t queue_id,
+		struct rte_vhost_async_status *opaque_data,
+		uint16_t max_packets);
+};
+
+/**
+ *  dma channel feature bit definition
+ */
+struct rte_vhost_async_features {
+	union {
+		uint32_t intval;
+		struct {
+			uint32_t async_inorder:1;
+			uint32_t resvd_0:15;
+			uint32_t async_threshold:12;
+			uint32_t resvd_1:4;
+		};
+	};
+};
+
+/**
+ * register a async channel for vhost
+ *
+ * @param vid
+ *  vhost device id async channel to be attached to
+ * @param queue_id
+ *  vhost queue id async channel to be attached to
+ * @param features
+ *  DMA channel feature bit
+ *    b0       : DMA supports inorder data transfer
+ *    b1  - b15: reserved
+ *    b16 - b27: Packet length threshold for DMA transfer
+ *    b28 - b31: reserved
+ * @param ops
+ *  DMA operation callbacks
+ * @return
+ *  0 on success, -1 on failures
+ */
+__rte_experimental
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+	uint32_t features, struct rte_vhost_async_channel_ops *ops);
+
+/**
+ * unregister a dma channel for vhost
+ *
+ * @param vid
+ *  vhost device id DMA channel to be detached
+ * @param queue_id
+ *  vhost queue id DMA channel to be detached
+ * @return
+ *  0 on success, -1 on failures
+ */
+__rte_experimental
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id);
+
+#endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 299576a..2d1e796 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -72,4 +72,8 @@ EXPERIMENTAL {
 	rte_vdpa_get_queue_num;
 	rte_vdpa_get_features;
 	rte_vdpa_get_protocol_features;
+	rte_vhost_async_channel_register;
+	rte_vhost_async_channel_unregister;
+	rte_vhost_submit_enqueue_burst;
+	rte_vhost_poll_enqueue_completed;
 };
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 49267ce..698b44e 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -42,6 +42,7 @@ struct vhost_user_socket {
 	bool use_builtin_virtio_net;
 	bool extbuf;
 	bool linearbuf;
+	bool async_copy;
 
 	/*
 	 * The "supported_features" indicates the feature bits the
@@ -205,6 +206,7 @@ struct vhost_user {
 	size_t size;
 	struct vhost_user_connection *conn;
 	int ret;
+	struct virtio_net *dev;
 
 	if (vsocket == NULL)
 		return;
@@ -236,6 +238,13 @@ struct vhost_user {
 	if (vsocket->linearbuf)
 		vhost_enable_linearbuf(vid);
 
+	if (vsocket->async_copy) {
+		dev = get_device(vid);
+
+		if (dev)
+			dev->async_copy = 1;
+	}
+
 	VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid);
 
 	if (vsocket->notify_ops->new_connection) {
@@ -881,6 +890,17 @@ struct rte_vdpa_device *
 		goto out_mutex;
 	}
 
+	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
+
+	if (vsocket->async_copy &&
+		(flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
+		RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
+		VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU "
+			"or post-copy feature simultaneously is not "
+			"supported\n");
+		goto out_mutex;
+	}
+
 	/*
 	 * Set the supported features correctly for the builtin vhost-user
 	 * net driver.
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0d822d6..58ee3ef 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -332,8 +332,13 @@
 {
 	if (vq_is_packed(dev))
 		rte_free(vq->shadow_used_packed);
-	else
+	else {
 		rte_free(vq->shadow_used_split);
+		if (vq->async_pkts_pending)
+			rte_free(vq->async_pkts_pending);
+		if (vq->async_pending_info)
+			rte_free(vq->async_pending_info);
+	}
 	rte_free(vq->batch_copy_elems);
 	rte_mempool_free(vq->iotlb_pool);
 	rte_free(vq);
@@ -1522,3 +1527,123 @@ int rte_vhost_extern_callback_register(int vid,
 	if (vhost_data_log_level >= 0)
 		rte_log_set_level(vhost_data_log_level, RTE_LOG_WARNING);
 }
+
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+					uint32_t features,
+					struct rte_vhost_async_channel_ops *ops)
+{
+	struct vhost_virtqueue *vq;
+	struct virtio_net *dev = get_device(vid);
+	struct rte_vhost_async_features f;
+
+	if (dev == NULL || ops == NULL)
+		return -1;
+
+	f.intval = features;
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(vq == NULL || !dev->async_copy))
+		return -1;
+
+	/** packed queue is not supported */
+	if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+		VHOST_LOG_CONFIG(ERR,
+			"async copy is not supported on packed queue or non-inorder mode "
+			"(vid %d, qid: %d)\n", vid, queue_id);
+		return -1;
+	}
+
+	if (unlikely(ops->check_completed_copies == NULL ||
+		ops->transfer_data == NULL))
+		return -1;
+
+	rte_spinlock_lock(&vq->access_lock);
+
+	if (unlikely(vq->async_registered)) {
+		VHOST_LOG_CONFIG(ERR,
+			"async register failed: channel already registered "
+			"(vid %d, qid: %d)\n", vid, queue_id);
+		goto reg_out;
+	}
+
+	vq->async_pkts_pending = rte_malloc(NULL,
+			vq->size * sizeof(uintptr_t),
+			RTE_CACHE_LINE_SIZE);
+	vq->async_pending_info = rte_malloc(NULL,
+			vq->size * sizeof(uint64_t),
+			RTE_CACHE_LINE_SIZE);
+	if (!vq->async_pkts_pending || !vq->async_pending_info) {
+		if (vq->async_pkts_pending)
+			rte_free(vq->async_pkts_pending);
+
+		if (vq->async_pending_info)
+			rte_free(vq->async_pending_info);
+
+		VHOST_LOG_CONFIG(ERR,
+				"async register failed: cannot allocate memory for vq data "
+				"(vid %d, qid: %d)\n", vid, queue_id);
+		goto reg_out;
+	}
+
+	vq->async_ops.check_completed_copies = ops->check_completed_copies;
+	vq->async_ops.transfer_data = ops->transfer_data;
+
+	vq->async_inorder = f.async_inorder;
+	vq->async_threshold = f.async_threshold;
+
+	vq->async_registered = true;
+
+reg_out:
+	rte_spinlock_unlock(&vq->access_lock);
+
+	return 0;
+}
+
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
+{
+	struct vhost_virtqueue *vq;
+	struct virtio_net *dev = get_device(vid);
+	int ret = -1;
+
+	if (dev == NULL)
+		return ret;
+
+	vq = dev->virtqueue[queue_id];
+
+	if (vq == NULL)
+		return ret;
+
+	ret = 0;
+	rte_spinlock_lock(&vq->access_lock);
+
+	if (!vq->async_registered)
+		goto out;
+
+	if (vq->async_pkts_inflight_n) {
+		VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
+			"async inflight packets must be completed before unregistration.\n");
+		ret = -1;
+		goto out;
+	}
+
+	if (vq->async_pkts_pending) {
+		rte_free(vq->async_pkts_pending);
+		vq->async_pkts_pending = 0;
+	}
+
+	if (vq->async_pending_info) {
+		rte_free(vq->async_pending_info);
+		vq->async_pending_info = 0;
+	}
+
+	vq->async_ops.transfer_data = NULL;
+	vq->async_ops.check_completed_copies = NULL;
+	vq->async_registered = false;
+
+out:
+	rte_spinlock_unlock(&vq->access_lock);
+
+	return ret;
+}
+
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 941a426..8baf322 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -24,6 +24,8 @@
 #include "rte_vdpa.h"
 #include "rte_vdpa_dev.h"
 
+#include "rte_vhost_async.h"
+
 /* Used to indicate that the device is running on a data core */
 #define VIRTIO_DEV_RUNNING 1
 /* Used to indicate that the device is ready to operate */
@@ -40,6 +42,11 @@
 
 #define VHOST_LOG_CACHE_NR 32
 
+#define MAX_PKT_BURST 32
+
+#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2)
+#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 2)
+
 #define PACKED_DESC_ENQUEUE_USED_FLAG(w)	\
 	((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
 		VRING_DESC_F_WRITE)
@@ -201,6 +208,25 @@ struct vhost_virtqueue {
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
 	int				iotlb_cache_nr;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
+
+	/* operation callbacks for async dma */
+	struct rte_vhost_async_channel_ops	async_ops;
+
+	struct rte_vhost_iov_iter it_pool[VHOST_MAX_ASYNC_IT];
+	struct iovec vec_pool[VHOST_MAX_ASYNC_VEC];
+
+	/* async data transfer status */
+	uintptr_t	**async_pkts_pending;
+	#define		ASYNC_PENDING_INFO_N_MSK 0xFFFF
+	#define		ASYNC_PENDING_INFO_N_SFT 16
+	uint64_t	*async_pending_info;
+	uint16_t	async_pkts_idx;
+	uint16_t	async_pkts_inflight_n;
+
+	/* vq async features */
+	bool		async_inorder;
+	bool		async_registered;
+	uint16_t	async_threshold;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macros defined */
@@ -354,6 +380,7 @@ struct virtio_net {
 	int16_t			broadcast_rarp;
 	uint32_t		nr_vring;
 	int			dequeue_zero_copy;
+	int			async_copy;
 	int			extbuf;
 	int			linearbuf;
 	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
@@ -699,7 +726,8 @@ uint64_t translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	/* Don't kick guest if we don't reach index specified by guest. */
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
 		uint16_t old = vq->signalled_used;
-		uint16_t new = vq->last_used_idx;
+		uint16_t new = vq->async_pkts_inflight_n ?
+					vq->used->idx:vq->last_used_idx;
 		bool signalled_used_valid = vq->signalled_used_valid;
 
 		vq->signalled_used = new;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 3405cd8..716effa 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -462,12 +462,18 @@
 	} else {
 		if (vq->shadow_used_split)
 			rte_free(vq->shadow_used_split);
+		if (vq->async_pkts_pending)
+			rte_free(vq->async_pkts_pending);
+		if (vq->async_pending_info)
+			rte_free(vq->async_pending_info);
+
 		vq->shadow_used_split = rte_malloc(NULL,
 				vq->size * sizeof(struct vring_used_elem),
 				RTE_CACHE_LINE_SIZE);
+
 		if (!vq->shadow_used_split) {
 			VHOST_LOG_CONFIG(ERR,
-					"failed to allocate memory for shadow used ring.\n");
+					"failed to allocate memory for vq internal data.\n");
 			return RTE_VHOST_MSG_RESULT_ERR;
 		}
 	}
@@ -1145,7 +1151,8 @@
 			goto err_mmap;
 		}
 
-		populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0;
+		populate = (dev->dequeue_zero_copy || dev->async_copy) ?
+			MAP_POPULATE : 0;
 		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
 				 MAP_SHARED | populate, fd, 0);
 
@@ -1160,7 +1167,7 @@
 		reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
 				      mmap_offset;
 
-		if (dev->dequeue_zero_copy)
+		if (dev->dequeue_zero_copy || dev->async_copy)
 			if (add_guest_pages(dev, reg, alignment) < 0) {
 				VHOST_LOG_CONFIG(ERR,
 					"adding guest pages to region %u failed.\n",
@@ -1943,6 +1950,12 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused,
 	} else {
 		rte_free(vq->shadow_used_split);
 		vq->shadow_used_split = NULL;
+		if (vq->async_pkts_pending)
+			rte_free(vq->async_pkts_pending);
+		if (vq->async_pending_info)
+			rte_free(vq->async_pending_info);
+		vq->async_pkts_pending = NULL;
+		vq->async_pending_info = NULL;
 	}
 
 	rte_free(vq->batch_copy_elems);
@@ -1977,6 +1990,14 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused,
 		"set queue enable: %d to qp idx: %d\n",
 		enable, index);
 
+	if (!enable && dev->virtqueue[index]->async_registered) {
+		if (dev->virtqueue[index]->async_pkts_inflight_n) {
+			VHOST_LOG_CONFIG(ERR, "failed to disable vring. "
+			"async inflight packets must be completed first\n");
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+	}
+
 	vdpa_dev = dev->vdpa_dev;
 	if (vdpa_dev && vdpa_dev->ops->set_vring_state)
 		vdpa_dev->ops->set_vring_state(dev->vid, index, enable);
-- 
1.8.3.1


  reply index

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-29 14:44 [dpdk-dev] [PATCH v2 0/2] introduce asynchronous data path for vhost patrick.fu
2020-06-29 14:44 ` patrick.fu [this message]
2020-06-29 14:44 ` [dpdk-dev] [PATCH v2 2/2] vhost: introduce async enqueue for split ring patrick.fu
2020-07-01  8:50   ` Liu, Yong
2020-07-02 12:21     ` Fu, Patrick

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1593441849-27306-2-git-send-email-patrick.fu@intel.com \
    --to=patrick.fu@intel.com \
    --cc=chenbo.xia@intel.com \
    --cc=cheng1.jiang@intel.com \
    --cc=cunming.liang@intel.com \
    --cc=dev@dpdk.org \
    --cc=maxime.coquelin@redhat.com \
    --cc=yinan.wang@intel.com \
    --cc=zhihong.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

Archives are clonable:
	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev


Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox