DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC] example/vhost: add support for vhost async data path
@ 2020-06-22  2:59 Cheng Jiang
  2020-06-23  1:54 ` Xia, Chenbo
  2020-07-08  7:26 ` [dpdk-dev] [RFC v2] " Cheng Jiang
  0 siblings, 2 replies; 4+ messages in thread
From: Cheng Jiang @ 2020-06-22  2:59 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia, zhihong.wang
  Cc: dev, patrick.fu, cunming.liang, Cheng Jiang

This patch makes vhost-vswitch be able to use vhost asynchronous
api for enqueue operations. Demonstrated how the application
leverage IOAT DMA channel with vhost async api. Since this is an
early preview patch, the performance has not been fully
optimized and it's not suggested to use this patch as a tool for
benchmark.

We introduce two parameters to enable DMA acceleration for Tx
operations of queues:
–async_vhost_driver Async vhost-user net driver which demonstrates
how to use the async vhost APIs will be used when this option is
given. It is disabled by default.

-dmas This parameter is used to specify the assigned DMA device of
a queue.

This patch depends on following patch set:
http://patches.dpdk.org/cover/71265/

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
 examples/vhost/main.c | 246 +++++++++++++++++++++++++++++++++++++++++-
 examples/vhost/main.h |   1 +
 2 files changed, 243 insertions(+), 4 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index ab649bf14..46dd282e0 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -24,11 +24,15 @@
 #include <rte_ip.h>
 #include <rte_tcp.h>
 #include <rte_pause.h>
+#include <rte_vhost_async.h>
+#include <rte_rawdev.h>
+#include <rte_ioat_rawdev.h>
+#include <rte_pci.h>
 
 #include "main.h"
 
 #ifndef MAX_QUEUES
-#define MAX_QUEUES 128
+#define MAX_QUEUES 512
 #endif
 
 /* the maximum number of external ports supported */
@@ -58,6 +62,10 @@
 /* Maximum long option length for option parsing. */
 #define MAX_LONG_OPT_SZ 64
 
+#define IOAT_RING_SIZE 4096
+
+#define MAX_ENQUEUED_SIZE 2048
+
 /* mask of enabled ports */
 static uint32_t enabled_port_mask = 0;
 
@@ -96,6 +104,20 @@ static int dequeue_zero_copy;
 
 static int builtin_net_driver;
 
+static int async_vhost_driver;
+
+struct dma_info {
+	struct rte_pci_addr addr;
+	uint16_t dev_id;
+	bool is_valid;
+};
+
+struct dma_info_input {
+	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
+	uint16_t nr;
+};
+
+static struct dma_info_input dma_bind[20];
 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
 /* Specify the number of retries on RX. */
@@ -141,6 +163,61 @@ static struct rte_eth_conf vmdq_conf_default = {
 	},
 };
 
+static int
+ioat_transfer_data_cb(int vid, uint16_t queue_id, struct dma_trans_desc *descs,
+		struct dma_trans_status *opaque_data, uint16_t count)
+{
+	int ret;
+	uint16_t i_desc;
+
+	struct iov_it *src = NULL;
+	struct iov_it *dst = NULL;
+	unsigned long i_seg;
+
+	int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
+	if (likely(!opaque_data)) {
+		for (i_desc = 0; i_desc < count; i_desc++) {
+			src = descs[i_desc].src;
+			dst = descs[i_desc].dst;
+			i_seg = 0;
+			while (i_seg < src->nr_segs) {
+				ret = rte_ioat_enqueue_copy(dev_id,
+					(uintptr_t)(src->iov[i_seg].iov_base)
+						+ src->offset,
+					(uintptr_t)(dst->iov[i_seg].iov_base)
+						+ dst->offset,
+					src->iov[i_seg].iov_len,
+					0,
+					0,
+					0);
+				if (ret != 1)
+					break;
+				i_seg++;
+			}
+		}
+	} else {
+		/* Opaque data is not supported */
+		return -1;
+	}
+	/* ring the doolbell */
+	rte_ioat_do_copies(dev_id);
+	return i_desc;
+}
+
+static int
+ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
+		struct dma_trans_status *opaque_data,
+		uint16_t max_packets __rte_unused)
+{
+	if (!opaque_data) {
+		uintptr_t dump[255];
+		return rte_ioat_completed_copies(dma_bind[vid].dmas[queue_id * 2
+			+ VIRTIO_RXQ].dev_id, 255, dump, dump);
+	} else {
+		/* Opaque data is not supported */
+		return -1;
+	}
+}
 
 static unsigned lcore_ids[RTE_MAX_LCORE];
 static uint16_t ports[RTE_MAX_ETHPORTS];
@@ -186,6 +263,94 @@ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
  * Builds up the correct configuration for VMDQ VLAN pool map
  * according to the pool & queue limits.
  */
+
+static inline int
+open_dma(const char *value, void *dma_bind_info)
+{
+	struct dma_info_input *dma_info = dma_bind_info;
+	char *input = strndup(value, strlen(value) + 1);
+	char *addrs = input;
+	char *ptrs[2];
+	char *start, *end, *substr;
+	int64_t qid, vring_id;
+	struct rte_ioat_rawdev_config config;
+	struct rte_rawdev_info info = { .dev_private = &config };
+	char name[32];
+	int dev_id;
+	int ret = 0;
+
+	while (isblank(*addrs))
+		addrs++;
+	if (*addrs == '\0') {
+		ret = -1;
+		goto out;
+	}
+
+	/* process DMA devices within bracket. */
+	addrs++;
+	substr = strtok(addrs, ";]");
+	if (!substr) {
+		ret = -1;
+		goto out;
+	}
+
+	do {
+		rte_strsplit(substr, strlen(substr), ptrs, 2, '@');
+
+		start = strstr(ptrs[0], "txq");
+		if (start == NULL) {
+			ret = -1;
+			goto out;
+		}
+
+		start += 3;
+		qid = strtol(start, &end, 0);
+		if (end == start) {
+			ret = -1;
+			goto out;
+		}
+
+		vring_id = qid * 2 + VIRTIO_RXQ;
+		if (rte_pci_addr_parse(ptrs[1],
+				       &dma_info->dmas[vring_id].addr) < 0) {
+			ret = -1;
+			goto out;
+		}
+
+		rte_pci_device_name(&dma_info->dmas[vring_id].addr,
+				    name, sizeof(name));
+		dev_id = rte_rawdev_get_dev_id(name);
+		if (dev_id == (uint16_t)(-ENODEV) ||
+		    dev_id == (uint16_t)(-EINVAL)) {
+			ret = -1;
+			goto out;
+		}
+
+		if (rte_rawdev_info_get(dev_id, &info) < 0 ||
+		    strstr(info.driver_name, "ioat") == NULL) {
+			ret = -1;
+			goto out;
+		}
+
+		dma_info->dmas[vring_id].dev_id = dev_id;
+		dma_info->dmas[vring_id].is_valid = true;
+		config.ring_size = IOAT_RING_SIZE;
+		if (rte_rawdev_configure(dev_id, &info) < 0) {
+			ret = -1;
+			goto out;
+		}
+		rte_rawdev_start(dev_id);
+
+		dma_info->nr++;
+
+		substr = strtok(NULL, ";]");
+	} while (substr);
+
+out:
+	free(input);
+	return ret;
+}
+
 static inline int
 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
 {
@@ -488,6 +653,8 @@ us_vhost_parse_args(int argc, char **argv)
 		{"client", no_argument, &client_mode, 1},
 		{"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
 		{"builtin-net-driver", no_argument, &builtin_net_driver, 1},
+		{"async_vhost_driver", no_argument, &async_vhost_driver, 1},
+		{"dmas", required_argument, NULL, 0},
 		{NULL, 0, 0, 0},
 	};
 
@@ -623,13 +790,25 @@ us_vhost_parse_args(int argc, char **argv)
 						"socket-file", MAX_LONG_OPT_SZ)) {
 				if (us_vhost_parse_socket_path(optarg) == -1) {
 					RTE_LOG(INFO, VHOST_CONFIG,
-					"Invalid argument for socket name (Max %d characters)\n",
-					PATH_MAX);
+						"Invalid argument for socket name (Max %d characters)\n",
+						PATH_MAX);
 					us_vhost_usage(prgname);
 					return -1;
 				}
 			}
 
+			if (!strncmp(long_option[option_index].name,
+						"dmas", MAX_LONG_OPT_SZ)) {
+				if (open_dma(optarg, &(dma_bind[0])) == -1) {
+					if (*optarg == -1) {
+						RTE_LOG(INFO, VHOST_CONFIG,
+							"Wrong DMA args\n");
+						us_vhost_usage(prgname);
+						return -1;
+					}
+				}
+			}
+
 			break;
 
 			/* Invalid option - print options. */
@@ -785,9 +964,26 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
+	struct rte_mbuf *m_cpl[1];
 
 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+	} else if (async_vhost_driver) {
+		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+						&m, 1);
+
+		if (likely(ret)) {
+			dst_vdev->nr_async_pkts++;
+			rte_mbuf_refcnt_update(m, 1);
+		}
+
+		while (likely(dst_vdev->nr_async_pkts)) {
+			dst_vdev->nr_async_pkts =
+				rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+					VIRTIO_RXQ, m_cpl, 1);
+			dst_vdev->nr_async_pkts--;
+			rte_pktmbuf_free(*m_cpl);
+		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
@@ -1036,6 +1232,19 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+						qid, p_cpl, MAX_PKT_BURST);
+	vdev->nr_async_pkts -= complete_count;
+	if (complete_count)
+		free_pkts(p_cpl, complete_count);
+}
+
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1044,6 +1253,10 @@ drain_eth_rx(struct vhost_dev *vdev)
 
 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
+
+	while (likely(vdev->nr_async_pkts))
+		complete_async_pkts(vdev, VIRTIO_RXQ);
+
 	if (!rx_count)
 		return;
 
@@ -1068,16 +1281,22 @@ drain_eth_rx(struct vhost_dev *vdev)
 	if (builtin_net_driver) {
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
+	} else if (async_vhost_driver) {
+		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+					VIRTIO_RXQ, pkts, rx_count);
+		vdev->nr_async_pkts += enqueue_count;
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
+
 	if (enable_stats) {
 		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
 		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
 	}
 
-	free_pkts(pkts, rx_count);
+	if (!async_vhost_driver)
+		free_pkts(pkts, rx_count);
 }
 
 static __rte_always_inline void
@@ -1224,6 +1443,9 @@ destroy_device(int vid)
 		"(%d) device has been removed from data core\n",
 		vdev->vid);
 
+	if (async_vhost_driver)
+		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+
 	rte_free(vdev);
 }
 
@@ -1238,6 +1460,12 @@ new_device(int vid)
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
 
+	struct rte_vhost_async_channel_ops channel_ops = {
+		.transfer_data = ioat_transfer_data_cb,
+		.check_completed_copies = ioat_check_completed_copies_cb
+	};
+	struct dma_channel_features f;
+
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
 	if (vdev == NULL) {
 		RTE_LOG(INFO, VHOST_DATA,
@@ -1278,6 +1506,13 @@ new_device(int vid)
 		"(%d) device has been added to data core %d\n",
 		vid, vdev->coreid);
 
+	if (async_vhost_driver) {
+		f.inorder = 1;
+		f.threshold = 256;
+		return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+			f.intval, &channel_ops);
+	}
+
 	return 0;
 }
 
@@ -1517,6 +1752,9 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+		if (async_vhost_driver)
+			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+
 		ret = rte_vhost_driver_register(file, flags);
 		if (ret != 0) {
 			unregister_drivers(i);
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 7cba0edbf..4317b6ae8 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
+	uint16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.26.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [RFC] example/vhost: add support for vhost async data path
  2020-06-22  2:59 [dpdk-dev] [RFC] example/vhost: add support for vhost async data path Cheng Jiang
@ 2020-06-23  1:54 ` Xia, Chenbo
  2020-07-08  3:19   ` Jiang, Cheng1
  2020-07-08  7:26 ` [dpdk-dev] [RFC v2] " Cheng Jiang
  1 sibling, 1 reply; 4+ messages in thread
From: Xia, Chenbo @ 2020-06-23  1:54 UTC (permalink / raw)
  To: Jiang, Cheng1, maxime.coquelin, Wang, Zhihong
  Cc: dev, Fu, Patrick, Liang, Cunming

Hi Cheng,

> -----Original Message-----
> From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> Sent: Monday, June 22, 2020 10:59 AM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> Wang, Zhihong <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; Fu, Patrick <patrick.fu@intel.com>; Liang, Cunming
> <cunming.liang@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> Subject: [RFC] example/vhost: add support for vhost async data path
> 
> This patch makes vhost-vswitch be able to use vhost asynchronous api for
> enqueue operations. Demonstrated how the application leverage IOAT DMA
> channel with vhost async api. Since this is an early preview patch, the
> performance has not been fully optimized and it's not suggested to use this
> patch as a tool for benchmark.
> 
> We introduce two parameters to enable DMA acceleration for Tx operations of
> queues:
> –async_vhost_driver Async vhost-user net driver which demonstrates how to use
> the async vhost APIs will be used when this option is given. It is disabled by
> default.
> 
> -dmas This parameter is used to specify the assigned DMA device of a queue.
> 
> This patch depends on following patch set:
> http://patches.dpdk.org/cover/71265/
> 
> Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> ---
>  examples/vhost/main.c | 246
> +++++++++++++++++++++++++++++++++++++++++-
>  examples/vhost/main.h |   1 +
>  2 files changed, 243 insertions(+), 4 deletions(-)
> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> ab649bf14..46dd282e0 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -24,11 +24,15 @@
>  #include <rte_ip.h>
>  #include <rte_tcp.h>
>  #include <rte_pause.h>
> +#include <rte_vhost_async.h>
> +#include <rte_rawdev.h>
> +#include <rte_ioat_rawdev.h>
> +#include <rte_pci.h>
> 
>  #include "main.h"
> 
>  #ifndef MAX_QUEUES
> -#define MAX_QUEUES 128
> +#define MAX_QUEUES 512
>  #endif
> 
>  /* the maximum number of external ports supported */ @@ -58,6 +62,10 @@
>  /* Maximum long option length for option parsing. */  #define
> MAX_LONG_OPT_SZ 64
> 
> +#define IOAT_RING_SIZE 4096
> +
> +#define MAX_ENQUEUED_SIZE 2048
> +
>  /* mask of enabled ports */
>  static uint32_t enabled_port_mask = 0;
> 
> @@ -96,6 +104,20 @@ static int dequeue_zero_copy;
> 
>  static int builtin_net_driver;
> 
> +static int async_vhost_driver;
> +
> +struct dma_info {
> +	struct rte_pci_addr addr;
> +	uint16_t dev_id;
> +	bool is_valid;
> +};
> +
> +struct dma_info_input {
> +	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> +	uint16_t nr;
> +};
> +
> +static struct dma_info_input dma_bind[20];

Should '20' be MAX_VHOST_DEVICE as this is indexed by vid?

>  /* Specify timeout (in useconds) between retries on RX. */  static uint32_t
> burst_rx_delay_time = BURST_RX_WAIT_US;
>  /* Specify the number of retries on RX. */ @@ -141,6 +163,61 @@ static struct
> rte_eth_conf vmdq_conf_default = {
>  	},
>  };
> 
> +static int
> +ioat_transfer_data_cb(int vid, uint16_t queue_id, struct dma_trans_desc
> *descs,
> +		struct dma_trans_status *opaque_data, uint16_t count) {
> +	int ret;
> +	uint16_t i_desc;
> +
> +	struct iov_it *src = NULL;
> +	struct iov_it *dst = NULL;
> +	unsigned long i_seg;
> +
> +	int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
> +	if (likely(!opaque_data)) {
> +		for (i_desc = 0; i_desc < count; i_desc++) {
> +			src = descs[i_desc].src;
> +			dst = descs[i_desc].dst;
> +			i_seg = 0;
> +			while (i_seg < src->nr_segs) {
> +				ret = rte_ioat_enqueue_copy(dev_id,
> +					(uintptr_t)(src->iov[i_seg].iov_base)
> +						+ src->offset,
> +					(uintptr_t)(dst->iov[i_seg].iov_base)
> +						+ dst->offset,
> +					src->iov[i_seg].iov_len,
> +					0,
> +					0,
> +					0);
> +				if (ret != 1)
> +					break;
> +				i_seg++;
> +			}
> +		}
> +	} else {
> +		/* Opaque data is not supported */
> +		return -1;
> +	}
> +	/* ring the doolbell */

s/doolbell/doorbell

> +	rte_ioat_do_copies(dev_id);
> +	return i_desc;
> +}
> +
> +static int
> +ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
> +		struct dma_trans_status *opaque_data,
> +		uint16_t max_packets __rte_unused)
> +{
> +	if (!opaque_data) {
> +		uintptr_t dump[255];
> +		return
> rte_ioat_completed_copies(dma_bind[vid].dmas[queue_id * 2
> +			+ VIRTIO_RXQ].dev_id, 255, dump, dump);
> +	} else {
> +		/* Opaque data is not supported */
> +		return -1;
> +	}
> +}
> 
>  static unsigned lcore_ids[RTE_MAX_LCORE];  static uint16_t
> ports[RTE_MAX_ETHPORTS]; @@ -186,6 +263,94 @@ struct mbuf_table
> lcore_tx_queue[RTE_MAX_LCORE];
>   * Builds up the correct configuration for VMDQ VLAN pool map
>   * according to the pool & queue limits.
>   */
> +
> +static inline int
> +open_dma(const char *value, void *dma_bind_info) {
> +	struct dma_info_input *dma_info = dma_bind_info;
> +	char *input = strndup(value, strlen(value) + 1);
> +	char *addrs = input;
> +	char *ptrs[2];
> +	char *start, *end, *substr;
> +	int64_t qid, vring_id;
> +	struct rte_ioat_rawdev_config config;
> +	struct rte_rawdev_info info = { .dev_private = &config };
> +	char name[32];
> +	int dev_id;
> +	int ret = 0;
> +
> +	while (isblank(*addrs))
> +		addrs++;
> +	if (*addrs == '\0') {
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	/* process DMA devices within bracket. */
> +	addrs++;
> +	substr = strtok(addrs, ";]");
> +	if (!substr) {
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	do {
> +		rte_strsplit(substr, strlen(substr), ptrs, 2, '@');
> +
> +		start = strstr(ptrs[0], "txq");
> +		if (start == NULL) {
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		start += 3;
> +		qid = strtol(start, &end, 0);
> +		if (end == start) {
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		vring_id = qid * 2 + VIRTIO_RXQ;
> +		if (rte_pci_addr_parse(ptrs[1],
> +				       &dma_info->dmas[vring_id].addr) < 0) {
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		rte_pci_device_name(&dma_info->dmas[vring_id].addr,
> +				    name, sizeof(name));
> +		dev_id = rte_rawdev_get_dev_id(name);
> +		if (dev_id == (uint16_t)(-ENODEV) ||
> +		    dev_id == (uint16_t)(-EINVAL)) {
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		if (rte_rawdev_info_get(dev_id, &info) < 0 ||
> +		    strstr(info.driver_name, "ioat") == NULL) {
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		dma_info->dmas[vring_id].dev_id = dev_id;
> +		dma_info->dmas[vring_id].is_valid = true;
> +		config.ring_size = IOAT_RING_SIZE;
> +		if (rte_rawdev_configure(dev_id, &info) < 0) {
> +			ret = -1;
> +			goto out;
> +		}
> +		rte_rawdev_start(dev_id);
> +
> +		dma_info->nr++;
> +
> +		substr = strtok(NULL, ";]");
> +	} while (substr);
> +
> +out:
> +	free(input);
> +	return ret;
> +}
> +
>  static inline int
>  get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)  { @@ -
> 488,6 +653,8 @@ us_vhost_parse_args(int argc, char **argv)
>  		{"client", no_argument, &client_mode, 1},
>  		{"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
>  		{"builtin-net-driver", no_argument, &builtin_net_driver, 1},
> +		{"async_vhost_driver", no_argument, &async_vhost_driver, 1},
> +		{"dmas", required_argument, NULL, 0},
>  		{NULL, 0, 0, 0},
>  	};
> 
> @@ -623,13 +790,25 @@ us_vhost_parse_args(int argc, char **argv)
>  						"socket-file",
> MAX_LONG_OPT_SZ)) {
>  				if (us_vhost_parse_socket_path(optarg) == -1) {
>  					RTE_LOG(INFO, VHOST_CONFIG,
> -					"Invalid argument for socket name
> (Max %d characters)\n",
> -					PATH_MAX);
> +						"Invalid argument for socket
> name (Max %d characters)\n",
> +						PATH_MAX);
>  					us_vhost_usage(prgname);
>  					return -1;
>  				}
>  			}
> 
> +			if (!strncmp(long_option[option_index].name,
> +						"dmas", MAX_LONG_OPT_SZ))
> {
> +				if (open_dma(optarg, &(dma_bind[0])) == -1) {
> +					if (*optarg == -1) {
> +						RTE_LOG(INFO,
> VHOST_CONFIG,
> +							"Wrong DMA args\n");
> +						us_vhost_usage(prgname);
> +						return -1;
> +					}
> +				}
> +			}
> +
>  			break;
> 
>  			/* Invalid option - print options. */ @@ -785,9 +964,26
> @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
>  	    struct rte_mbuf *m)
>  {
>  	uint16_t ret;
> +	struct rte_mbuf *m_cpl[1];
> 
>  	if (builtin_net_driver) {
>  		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
> +	} else if (async_vhost_driver) {
> +		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid,
> VIRTIO_RXQ,
> +						&m, 1);
> +
> +		if (likely(ret)) {
> +			dst_vdev->nr_async_pkts++;
> +			rte_mbuf_refcnt_update(m, 1);
> +		}
> +
> +		while (likely(dst_vdev->nr_async_pkts)) {
> +			dst_vdev->nr_async_pkts =
> +				rte_vhost_poll_enqueue_completed(dst_vdev-
> >vid,
> +					VIRTIO_RXQ, m_cpl, 1);

I think nr_async_pkts should be changed only when rte_vhost_poll_enqueue_completed succeed?

Thanks!
Chenbo

> +			dst_vdev->nr_async_pkts--;
> +			rte_pktmbuf_free(*m_cpl);
> +		}
>  	} else {
>  		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m,
> 1);
>  	}
> @@ -1036,6 +1232,19 @@ drain_mbuf_table(struct mbuf_table *tx_q)
>  	}
>  }
> 
> +static __rte_always_inline void
> +complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) {
> +	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> +	uint16_t complete_count;
> +
> +	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> +						qid, p_cpl, MAX_PKT_BURST);
> +	vdev->nr_async_pkts -= complete_count;
> +	if (complete_count)
> +		free_pkts(p_cpl, complete_count);
> +}
> +
>  static __rte_always_inline void
>  drain_eth_rx(struct vhost_dev *vdev)
>  {
> @@ -1044,6 +1253,10 @@ drain_eth_rx(struct vhost_dev *vdev)
> 
>  	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
>  				    pkts, MAX_PKT_BURST);
> +
> +	while (likely(vdev->nr_async_pkts))
> +		complete_async_pkts(vdev, VIRTIO_RXQ);
> +
>  	if (!rx_count)
>  		return;
> 
> @@ -1068,16 +1281,22 @@ drain_eth_rx(struct vhost_dev *vdev)
>  	if (builtin_net_driver) {
>  		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
>  						pkts, rx_count);
> +	} else if (async_vhost_driver) {
> +		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> +					VIRTIO_RXQ, pkts, rx_count);
> +		vdev->nr_async_pkts += enqueue_count;
>  	} else {
>  		enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
>  						pkts, rx_count);
>  	}
> +
>  	if (enable_stats) {
>  		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
>  		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
>  	}
> 
> -	free_pkts(pkts, rx_count);
> +	if (!async_vhost_driver)
> +		free_pkts(pkts, rx_count);
>  }
> 
>  static __rte_always_inline void
> @@ -1224,6 +1443,9 @@ destroy_device(int vid)
>  		"(%d) device has been removed from data core\n",
>  		vdev->vid);
> 
> +	if (async_vhost_driver)
> +		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> +
>  	rte_free(vdev);
>  }
> 
> @@ -1238,6 +1460,12 @@ new_device(int vid)
>  	uint32_t device_num_min = num_devices;
>  	struct vhost_dev *vdev;
> 
> +	struct rte_vhost_async_channel_ops channel_ops = {
> +		.transfer_data = ioat_transfer_data_cb,
> +		.check_completed_copies = ioat_check_completed_copies_cb
> +	};
> +	struct dma_channel_features f;
> +
>  	vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> RTE_CACHE_LINE_SIZE);
>  	if (vdev == NULL) {
>  		RTE_LOG(INFO, VHOST_DATA,
> @@ -1278,6 +1506,13 @@ new_device(int vid)
>  		"(%d) device has been added to data core %d\n",
>  		vid, vdev->coreid);
> 
> +	if (async_vhost_driver) {
> +		f.inorder = 1;
> +		f.threshold = 256;
> +		return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
> +			f.intval, &channel_ops);
> +	}
> +
>  	return 0;
>  }
> 
> @@ -1517,6 +1752,9 @@ main(int argc, char *argv[])
>  	/* Register vhost user driver to handle vhost messages. */
>  	for (i = 0; i < nb_sockets; i++) {
>  		char *file = socket_files + i * PATH_MAX;
> +		if (async_vhost_driver)
> +			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> +
>  		ret = rte_vhost_driver_register(file, flags);
>  		if (ret != 0) {
>  			unregister_drivers(i);
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h index
> 7cba0edbf..4317b6ae8 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -51,6 +51,7 @@ struct vhost_dev {
>  	uint64_t features;
>  	size_t hdr_len;
>  	uint16_t nr_vrings;
> +	uint16_t nr_async_pkts;
>  	struct rte_vhost_memory *mem;
>  	struct device_statistics stats;
>  	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> --
> 2.26.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [RFC] example/vhost: add support for vhost async data path
  2020-06-23  1:54 ` Xia, Chenbo
@ 2020-07-08  3:19   ` Jiang, Cheng1
  0 siblings, 0 replies; 4+ messages in thread
From: Jiang, Cheng1 @ 2020-07-08  3:19 UTC (permalink / raw)
  To: Xia, Chenbo, maxime.coquelin, Wang, Zhihong
  Cc: dev, Fu, Patrick, Liang, Cunming

Hi Chenbo,

> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Tuesday, June 23, 2020 9:54 AM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Wang, Zhihong <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; Fu, Patrick <patrick.fu@intel.com>; Liang, Cunming
> <cunming.liang@intel.com>
> Subject: RE: [RFC] example/vhost: add support for vhost async data path
> 
> Hi Cheng,
> 
> > -----Original Message-----
> > From: Jiang, Cheng1 <cheng1.jiang@intel.com>
> > Sent: Monday, June 22, 2020 10:59 AM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> > Wang, Zhihong <zhihong.wang@intel.com>
> > Cc: dev@dpdk.org; Fu, Patrick <patrick.fu@intel.com>; Liang, Cunming
> > <cunming.liang@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> > Subject: [RFC] example/vhost: add support for vhost async data path
> >
> > This patch makes vhost-vswitch be able to use vhost asynchronous api
> > for enqueue operations. Demonstrated how the application leverage IOAT
> > DMA channel with vhost async api. Since this is an early preview
> > patch, the performance has not been fully optimized and it's not
> > suggested to use this patch as a tool for benchmark.
> >
> > We introduce two parameters to enable DMA acceleration for Tx
> > operations of
> > queues:
> > –async_vhost_driver Async vhost-user net driver which demonstrates how
> > to use the async vhost APIs will be used when this option is given. It
> > is disabled by default.
> >
> > -dmas This parameter is used to specify the assigned DMA device of a
> queue.
> >
> > This patch depends on following patch set:
> > http://patches.dpdk.org/cover/71265/
> >
> > Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
> > ---
> >  examples/vhost/main.c | 246
> > +++++++++++++++++++++++++++++++++++++++++-
> >  examples/vhost/main.h |   1 +
> >  2 files changed, 243 insertions(+), 4 deletions(-)
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > ab649bf14..46dd282e0 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -24,11 +24,15 @@
> >  #include <rte_ip.h>
> >  #include <rte_tcp.h>
> >  #include <rte_pause.h>
> > +#include <rte_vhost_async.h>
> > +#include <rte_rawdev.h>
> > +#include <rte_ioat_rawdev.h>
> > +#include <rte_pci.h>
> >
> >  #include "main.h"
> >
> >  #ifndef MAX_QUEUES
> > -#define MAX_QUEUES 128
> > +#define MAX_QUEUES 512
> >  #endif
> >
> >  /* the maximum number of external ports supported */ @@ -58,6 +62,10
> > @@
> >  /* Maximum long option length for option parsing. */  #define
> > MAX_LONG_OPT_SZ 64
> >
> > +#define IOAT_RING_SIZE 4096
> > +
> > +#define MAX_ENQUEUED_SIZE 2048
> > +
> >  /* mask of enabled ports */
> >  static uint32_t enabled_port_mask = 0;
> >
> > @@ -96,6 +104,20 @@ static int dequeue_zero_copy;
> >
> >  static int builtin_net_driver;
> >
> > +static int async_vhost_driver;
> > +
> > +struct dma_info {
> > +	struct rte_pci_addr addr;
> > +	uint16_t dev_id;
> > +	bool is_valid;
> > +};
> > +
> > +struct dma_info_input {
> > +	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> > +	uint16_t nr;
> > +};
> > +
> > +static struct dma_info_input dma_bind[20];
> 
> Should '20' be MAX_VHOST_DEVICE as this is indexed by vid?
> 
Sure, you are right, MAX_VHOST_DEVICE will be used in the next version.

> >  /* Specify timeout (in useconds) between retries on RX. */  static
> > uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
> >  /* Specify the number of retries on RX. */ @@ -141,6 +163,61 @@
> > static struct rte_eth_conf vmdq_conf_default = {
> >  	},
> >  };
> >
> > +static int
> > +ioat_transfer_data_cb(int vid, uint16_t queue_id, struct
> > +dma_trans_desc
> > *descs,
> > +		struct dma_trans_status *opaque_data, uint16_t count) {
> > +	int ret;
> > +	uint16_t i_desc;
> > +
> > +	struct iov_it *src = NULL;
> > +	struct iov_it *dst = NULL;
> > +	unsigned long i_seg;
> > +
> > +	int dev_id = dma_bind[vid].dmas[queue_id * 2 +
> VIRTIO_RXQ].dev_id;
> > +	if (likely(!opaque_data)) {
> > +		for (i_desc = 0; i_desc < count; i_desc++) {
> > +			src = descs[i_desc].src;
> > +			dst = descs[i_desc].dst;
> > +			i_seg = 0;
> > +			while (i_seg < src->nr_segs) {
> > +				ret = rte_ioat_enqueue_copy(dev_id,
> > +					(uintptr_t)(src->iov[i_seg].iov_base)
> > +						+ src->offset,
> > +					(uintptr_t)(dst->iov[i_seg].iov_base)
> > +						+ dst->offset,
> > +					src->iov[i_seg].iov_len,
> > +					0,
> > +					0,
> > +					0);
> > +				if (ret != 1)
> > +					break;
> > +				i_seg++;
> > +			}
> > +		}
> > +	} else {
> > +		/* Opaque data is not supported */
> > +		return -1;
> > +	}
> > +	/* ring the doolbell */
> 
> s/doolbell/doorbell
> 
I'll fix in the next version.

> > +	rte_ioat_do_copies(dev_id);
> > +	return i_desc;
> > +}
> > +
> > +static int
> > +ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
> > +		struct dma_trans_status *opaque_data,
> > +		uint16_t max_packets __rte_unused)
> > +{
> > +	if (!opaque_data) {
> > +		uintptr_t dump[255];
> > +		return
> > rte_ioat_completed_copies(dma_bind[vid].dmas[queue_id * 2
> > +			+ VIRTIO_RXQ].dev_id, 255, dump, dump);
> > +	} else {
> > +		/* Opaque data is not supported */
> > +		return -1;
> > +	}
> > +}
> >
> >  static unsigned lcore_ids[RTE_MAX_LCORE];  static uint16_t
> > ports[RTE_MAX_ETHPORTS]; @@ -186,6 +263,94 @@ struct mbuf_table
> > lcore_tx_queue[RTE_MAX_LCORE];
> >   * Builds up the correct configuration for VMDQ VLAN pool map
> >   * according to the pool & queue limits.
> >   */
> > +
> > +static inline int
> > +open_dma(const char *value, void *dma_bind_info) {
> > +	struct dma_info_input *dma_info = dma_bind_info;
> > +	char *input = strndup(value, strlen(value) + 1);
> > +	char *addrs = input;
> > +	char *ptrs[2];
> > +	char *start, *end, *substr;
> > +	int64_t qid, vring_id;
> > +	struct rte_ioat_rawdev_config config;
> > +	struct rte_rawdev_info info = { .dev_private = &config };
> > +	char name[32];
> > +	int dev_id;
> > +	int ret = 0;
> > +
> > +	while (isblank(*addrs))
> > +		addrs++;
> > +	if (*addrs == '\0') {
> > +		ret = -1;
> > +		goto out;
> > +	}
> > +
> > +	/* process DMA devices within bracket. */
> > +	addrs++;
> > +	substr = strtok(addrs, ";]");
> > +	if (!substr) {
> > +		ret = -1;
> > +		goto out;
> > +	}
> > +
> > +	do {
> > +		rte_strsplit(substr, strlen(substr), ptrs, 2, '@');
> > +
> > +		start = strstr(ptrs[0], "txq");
> > +		if (start == NULL) {
> > +			ret = -1;
> > +			goto out;
> > +		}
> > +
> > +		start += 3;
> > +		qid = strtol(start, &end, 0);
> > +		if (end == start) {
> > +			ret = -1;
> > +			goto out;
> > +		}
> > +
> > +		vring_id = qid * 2 + VIRTIO_RXQ;
> > +		if (rte_pci_addr_parse(ptrs[1],
> > +				       &dma_info->dmas[vring_id].addr) < 0) {
> > +			ret = -1;
> > +			goto out;
> > +		}
> > +
> > +		rte_pci_device_name(&dma_info->dmas[vring_id].addr,
> > +				    name, sizeof(name));
> > +		dev_id = rte_rawdev_get_dev_id(name);
> > +		if (dev_id == (uint16_t)(-ENODEV) ||
> > +		    dev_id == (uint16_t)(-EINVAL)) {
> > +			ret = -1;
> > +			goto out;
> > +		}
> > +
> > +		if (rte_rawdev_info_get(dev_id, &info) < 0 ||
> > +		    strstr(info.driver_name, "ioat") == NULL) {
> > +			ret = -1;
> > +			goto out;
> > +		}
> > +
> > +		dma_info->dmas[vring_id].dev_id = dev_id;
> > +		dma_info->dmas[vring_id].is_valid = true;
> > +		config.ring_size = IOAT_RING_SIZE;
> > +		if (rte_rawdev_configure(dev_id, &info) < 0) {
> > +			ret = -1;
> > +			goto out;
> > +		}
> > +		rte_rawdev_start(dev_id);
> > +
> > +		dma_info->nr++;
> > +
> > +		substr = strtok(NULL, ";]");
> > +	} while (substr);
> > +
> > +out:
> > +	free(input);
> > +	return ret;
> > +}
> > +
> >  static inline int
> >  get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)  {
> > @@ -
> > 488,6 +653,8 @@ us_vhost_parse_args(int argc, char **argv)
> >  		{"client", no_argument, &client_mode, 1},
> >  		{"dequeue-zero-copy", no_argument,
> &dequeue_zero_copy, 1},
> >  		{"builtin-net-driver", no_argument, &builtin_net_driver, 1},
> > +		{"async_vhost_driver", no_argument, &async_vhost_driver,
> 1},
> > +		{"dmas", required_argument, NULL, 0},
> >  		{NULL, 0, 0, 0},
> >  	};
> >
> > @@ -623,13 +790,25 @@ us_vhost_parse_args(int argc, char **argv)
> >  						"socket-file",
> > MAX_LONG_OPT_SZ)) {
> >  				if (us_vhost_parse_socket_path(optarg) == -
> 1) {
> >  					RTE_LOG(INFO, VHOST_CONFIG,
> > -					"Invalid argument for socket name
> > (Max %d characters)\n",
> > -					PATH_MAX);
> > +						"Invalid argument for socket
> > name (Max %d characters)\n",
> > +						PATH_MAX);
> >  					us_vhost_usage(prgname);
> >  					return -1;
> >  				}
> >  			}
> >
> > +			if (!strncmp(long_option[option_index].name,
> > +						"dmas",
> MAX_LONG_OPT_SZ))
> > {
> > +				if (open_dma(optarg, &(dma_bind[0])) == -1)
> {
> > +					if (*optarg == -1) {
> > +						RTE_LOG(INFO,
> > VHOST_CONFIG,
> > +							"Wrong DMA args\n");
> > +						us_vhost_usage(prgname);
> > +						return -1;
> > +					}
> > +				}
> > +			}
> > +
> >  			break;
> >
> >  			/* Invalid option - print options. */ @@ -785,9
> +964,26 @@
> > virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
> >  	    struct rte_mbuf *m)
> >  {
> >  	uint16_t ret;
> > +	struct rte_mbuf *m_cpl[1];
> >
> >  	if (builtin_net_driver) {
> >  		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
> > +	} else if (async_vhost_driver) {
> > +		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid,
> > VIRTIO_RXQ,
> > +						&m, 1);
> > +
> > +		if (likely(ret)) {
> > +			dst_vdev->nr_async_pkts++;
> > +			rte_mbuf_refcnt_update(m, 1);
> > +		}
> > +
> > +		while (likely(dst_vdev->nr_async_pkts)) {
> > +			dst_vdev->nr_async_pkts =
> > +
> 	rte_vhost_poll_enqueue_completed(dst_vdev-
> > >vid,
> > +					VIRTIO_RXQ, m_cpl, 1);
> 
> I think nr_async_pkts should be changed only when
> rte_vhost_poll_enqueue_completed succeed?
> 
> Thanks!
> Chenbo
> 
Yes, make sense, I'll update in the next version.

Thanks,
Cheng

> > +			dst_vdev->nr_async_pkts--;
> > +			rte_pktmbuf_free(*m_cpl);
> > +		}
> >  	} else {
> >  		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
> &m, 1);
> >  	}
> > @@ -1036,6 +1232,19 @@ drain_mbuf_table(struct mbuf_table *tx_q)
> >  	}
> >  }
> >
> > +static __rte_always_inline void
> > +complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) {
> > +	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> > +	uint16_t complete_count;
> > +
> > +	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > +						qid, p_cpl, MAX_PKT_BURST);
> > +	vdev->nr_async_pkts -= complete_count;
> > +	if (complete_count)
> > +		free_pkts(p_cpl, complete_count);
> > +}
> > +
> >  static __rte_always_inline void
> >  drain_eth_rx(struct vhost_dev *vdev)
> >  {
> > @@ -1044,6 +1253,10 @@ drain_eth_rx(struct vhost_dev *vdev)
> >
> >  	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
> >  				    pkts, MAX_PKT_BURST);
> > +
> > +	while (likely(vdev->nr_async_pkts))
> > +		complete_async_pkts(vdev, VIRTIO_RXQ);
> > +
> >  	if (!rx_count)
> >  		return;
> >
> > @@ -1068,16 +1281,22 @@ drain_eth_rx(struct vhost_dev *vdev)
> >  	if (builtin_net_driver) {
> >  		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
> >  						pkts, rx_count);
> > +	} else if (async_vhost_driver) {
> > +		enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> > +					VIRTIO_RXQ, pkts, rx_count);
> > +		vdev->nr_async_pkts += enqueue_count;
> >  	} else {
> >  		enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> > VIRTIO_RXQ,
> >  						pkts, rx_count);
> >  	}
> > +
> >  	if (enable_stats) {
> >  		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
> >  		rte_atomic64_add(&vdev->stats.rx_atomic,
> enqueue_count);
> >  	}
> >
> > -	free_pkts(pkts, rx_count);
> > +	if (!async_vhost_driver)
> > +		free_pkts(pkts, rx_count);
> >  }
> >
> >  static __rte_always_inline void
> > @@ -1224,6 +1443,9 @@ destroy_device(int vid)
> >  		"(%d) device has been removed from data core\n",
> >  		vdev->vid);
> >
> > +	if (async_vhost_driver)
> > +		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> > +
> >  	rte_free(vdev);
> >  }
> >
> > @@ -1238,6 +1460,12 @@ new_device(int vid)
> >  	uint32_t device_num_min = num_devices;
> >  	struct vhost_dev *vdev;
> >
> > +	struct rte_vhost_async_channel_ops channel_ops = {
> > +		.transfer_data = ioat_transfer_data_cb,
> > +		.check_completed_copies =
> ioat_check_completed_copies_cb
> > +	};
> > +	struct dma_channel_features f;
> > +
> >  	vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> > RTE_CACHE_LINE_SIZE);
> >  	if (vdev == NULL) {
> >  		RTE_LOG(INFO, VHOST_DATA,
> > @@ -1278,6 +1506,13 @@ new_device(int vid)
> >  		"(%d) device has been added to data core %d\n",
> >  		vid, vdev->coreid);
> >
> > +	if (async_vhost_driver) {
> > +		f.inorder = 1;
> > +		f.threshold = 256;
> > +		return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
> > +			f.intval, &channel_ops);
> > +	}
> > +
> >  	return 0;
> >  }
> >
> > @@ -1517,6 +1752,9 @@ main(int argc, char *argv[])
> >  	/* Register vhost user driver to handle vhost messages. */
> >  	for (i = 0; i < nb_sockets; i++) {
> >  		char *file = socket_files + i * PATH_MAX;
> > +		if (async_vhost_driver)
> > +			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> > +
> >  		ret = rte_vhost_driver_register(file, flags);
> >  		if (ret != 0) {
> >  			unregister_drivers(i);
> > diff --git a/examples/vhost/main.h b/examples/vhost/main.h index
> > 7cba0edbf..4317b6ae8 100644
> > --- a/examples/vhost/main.h
> > +++ b/examples/vhost/main.h
> > @@ -51,6 +51,7 @@ struct vhost_dev {
> >  	uint64_t features;
> >  	size_t hdr_len;
> >  	uint16_t nr_vrings;
> > +	uint16_t nr_async_pkts;
> >  	struct rte_vhost_memory *mem;
> >  	struct device_statistics stats;
> >  	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> > --
> > 2.26.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [dpdk-dev] [RFC v2] example/vhost: add support for vhost async data path
  2020-06-22  2:59 [dpdk-dev] [RFC] example/vhost: add support for vhost async data path Cheng Jiang
  2020-06-23  1:54 ` Xia, Chenbo
@ 2020-07-08  7:26 ` Cheng Jiang
  1 sibling, 0 replies; 4+ messages in thread
From: Cheng Jiang @ 2020-07-08  7:26 UTC (permalink / raw)
  To: maxime.coquelin, chenbo.xia, zhihong.wang
  Cc: dev, patrick.fu, cunming.liang, Cheng Jiang

This patch makes vhost-vswitch be able to use vhost asynchronous
api for enqueue operations. Demonstrated how the application
leverage IOAT DMA channel with vhost async api. Since this is an
early preview patch, the performance has not been fully
optimized and it's not suggested to use this patch as a tool for
benchmark.

We introduce two parameters to enable DMA acceleration for Tx
operations of queues:
–async_vhost_driver Async vhost-user net driver which demonstrates
how to use the async vhost APIs will be used when this option is
given. It is disabled by default.

-dmas This parameter is used to specify the assigned DMA device of
a queue.

This patch depends on following patch set:
http://patches.dpdk.org/cover/73359/

Signed-off-by: Cheng Jiang <Cheng1.jiang@intel.com>
---
v2:
* updated some variable names based on the latest async vhost patch
* fixed a bug in virtio_xmit function
* fixed a hardcode
* fixed a typo
---
 examples/vhost/main.c | 248 +++++++++++++++++++++++++++++++++++++++++-
 examples/vhost/main.h |   1 +
 2 files changed, 245 insertions(+), 4 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 312829e8b..72135a3df 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -24,11 +24,15 @@
 #include <rte_ip.h>
 #include <rte_tcp.h>
 #include <rte_pause.h>
+#include <rte_vhost_async.h>
+#include <rte_rawdev.h>
+#include <rte_ioat_rawdev.h>
+#include <rte_pci.h>

 #include "main.h"

 #ifndef MAX_QUEUES
-#define MAX_QUEUES 128
+#define MAX_QUEUES 512
 #endif

 /* the maximum number of external ports supported */
@@ -58,6 +62,12 @@
 /* Maximum long option length for option parsing. */
 #define MAX_LONG_OPT_SZ 64

+#define IOAT_RING_SIZE 4096
+
+#define MAX_ENQUEUED_SIZE 2048
+
+#define MAX_VHOST_DEVICE 1024
+
 /* mask of enabled ports */
 static uint32_t enabled_port_mask = 0;

@@ -96,6 +106,20 @@ static int dequeue_zero_copy;

 static int builtin_net_driver;

+static int async_vhost_driver;
+
+struct dma_info {
+	struct rte_pci_addr addr;
+	uint16_t dev_id;
+	bool is_valid;
+};
+
+struct dma_info_input {
+	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
+	uint16_t nr;
+};
+
+static struct dma_info_input dma_bind[MAX_VHOST_DEVICE];
 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
 /* Specify the number of retries on RX. */
@@ -141,6 +165,61 @@ static struct rte_eth_conf vmdq_conf_default = {
 	},
 };

+static int
+ioat_transfer_data_cb(int vid, uint16_t queue_id, struct rte_vhost_async_desc *descs,
+		struct rte_vhost_async_status *opaque_data, uint16_t count)
+{
+	int ret;
+	uint16_t i_desc;
+
+	struct rte_vhost_iov_iter *src = NULL;
+	struct rte_vhost_iov_iter *dst = NULL;
+	unsigned long i_seg;
+
+	int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
+	if (likely(!opaque_data)) {
+		for (i_desc = 0; i_desc < count; i_desc++) {
+			src = descs[i_desc].src;
+			dst = descs[i_desc].dst;
+			i_seg = 0;
+			while (i_seg < src->nr_segs) {
+				ret = rte_ioat_enqueue_copy(dev_id,
+					(uintptr_t)(src->iov[i_seg].iov_base)
+						+ src->offset,
+					(uintptr_t)(dst->iov[i_seg].iov_base)
+						+ dst->offset,
+					src->iov[i_seg].iov_len,
+					0,
+					0,
+					0);
+				if (ret != 1)
+					break;
+				i_seg++;
+			}
+		}
+	} else {
+		/* Opaque data is not supported */
+		return -1;
+	}
+	/* ring the doorbell */
+	rte_ioat_do_copies(dev_id);
+	return i_desc;
+}
+
+static int
+ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
+		struct rte_vhost_async_status *opaque_data,
+		uint16_t max_packets __rte_unused)
+{
+	if (!opaque_data) {
+		uintptr_t dump[255];
+		return rte_ioat_completed_copies(dma_bind[vid].dmas[queue_id * 2
+			+ VIRTIO_RXQ].dev_id, 255, dump, dump);
+	} else {
+		/* Opaque data is not supported */
+		return -1;
+	}
+}

 static unsigned lcore_ids[RTE_MAX_LCORE];
 static uint16_t ports[RTE_MAX_ETHPORTS];
@@ -186,6 +265,94 @@ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
  * Builds up the correct configuration for VMDQ VLAN pool map
  * according to the pool & queue limits.
  */
+
+static inline int
+open_dma(const char *value, void *dma_bind_info)
+{
+	struct dma_info_input *dma_info = dma_bind_info;
+	char *input = strndup(value, strlen(value) + 1);
+	char *addrs = input;
+	char *ptrs[2];
+	char *start, *end, *substr;
+	int64_t qid, vring_id;
+	struct rte_ioat_rawdev_config config;
+	struct rte_rawdev_info info = { .dev_private = &config };
+	char name[32];
+	int dev_id;
+	int ret = 0;
+
+	while (isblank(*addrs))
+		addrs++;
+	if (*addrs == '\0') {
+		ret = -1;
+		goto out;
+	}
+
+	/* process DMA devices within bracket. */
+	addrs++;
+	substr = strtok(addrs, ";]");
+	if (!substr) {
+		ret = -1;
+		goto out;
+	}
+
+	do {
+		rte_strsplit(substr, strlen(substr), ptrs, 2, '@');
+
+		start = strstr(ptrs[0], "txq");
+		if (start == NULL) {
+			ret = -1;
+			goto out;
+		}
+
+		start += 3;
+		qid = strtol(start, &end, 0);
+		if (end == start) {
+			ret = -1;
+			goto out;
+		}
+
+		vring_id = qid * 2 + VIRTIO_RXQ;
+		if (rte_pci_addr_parse(ptrs[1],
+				       &dma_info->dmas[vring_id].addr) < 0) {
+			ret = -1;
+			goto out;
+		}
+
+		rte_pci_device_name(&dma_info->dmas[vring_id].addr,
+				    name, sizeof(name));
+		dev_id = rte_rawdev_get_dev_id(name);
+		if (dev_id == (uint16_t)(-ENODEV) ||
+		    dev_id == (uint16_t)(-EINVAL)) {
+			ret = -1;
+			goto out;
+		}
+
+		if (rte_rawdev_info_get(dev_id, &info) < 0 ||
+		    strstr(info.driver_name, "ioat") == NULL) {
+			ret = -1;
+			goto out;
+		}
+
+		dma_info->dmas[vring_id].dev_id = dev_id;
+		dma_info->dmas[vring_id].is_valid = true;
+		config.ring_size = IOAT_RING_SIZE;
+		if (rte_rawdev_configure(dev_id, &info) < 0) {
+			ret = -1;
+			goto out;
+		}
+		rte_rawdev_start(dev_id);
+
+		dma_info->nr++;
+
+		substr = strtok(NULL, ";]");
+	} while (substr);
+
+out:
+	free(input);
+	return ret;
+}
+
 static inline int
 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
 {
@@ -488,6 +655,8 @@ us_vhost_parse_args(int argc, char **argv)
 		{"client", no_argument, &client_mode, 1},
 		{"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
 		{"builtin-net-driver", no_argument, &builtin_net_driver, 1},
+		{"async_vhost_driver", no_argument, &async_vhost_driver, 1},
+		{"dmas", required_argument, NULL, 0},
 		{NULL, 0, 0, 0},
 	};

@@ -623,13 +792,25 @@ us_vhost_parse_args(int argc, char **argv)
 						"socket-file", MAX_LONG_OPT_SZ)) {
 				if (us_vhost_parse_socket_path(optarg) == -1) {
 					RTE_LOG(INFO, VHOST_CONFIG,
-					"Invalid argument for socket name (Max %d characters)\n",
-					PATH_MAX);
+						"Invalid argument for socket name (Max %d characters)\n",
+						PATH_MAX);
 					us_vhost_usage(prgname);
 					return -1;
 				}
 			}

+			if (!strncmp(long_option[option_index].name,
+						"dmas", MAX_LONG_OPT_SZ)) {
+				if (open_dma(optarg, &(dma_bind[0])) == -1) {
+					if (*optarg == -1) {
+						RTE_LOG(INFO, VHOST_CONFIG,
+							"Wrong DMA args\n");
+						us_vhost_usage(prgname);
+						return -1;
+					}
+				}
+			}
+
 			break;

 			/* Invalid option - print options. */
@@ -785,9 +966,26 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
 	    struct rte_mbuf *m)
 {
 	uint16_t ret;
+	struct rte_mbuf *m_cpl[1];

 	if (builtin_net_driver) {
 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+	} else if (async_vhost_driver) {
+		ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+						&m, 1);
+
+		if (likely(ret)) {
+			dst_vdev->nr_async_pkts++;
+			rte_mbuf_refcnt_update(m, 1);
+		}
+
+		while (likely(dst_vdev->nr_async_pkts)) {
+			if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+					VIRTIO_RXQ, m_cpl, 1)) {
+				dst_vdev->nr_async_pkts--;
+				rte_pktmbuf_free(*m_cpl);
+			}
+		}
 	} else {
 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
 	}
@@ -1036,6 +1234,19 @@ drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }

+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
+{
+	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+	uint16_t complete_count;
+
+	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+						qid, p_cpl, MAX_PKT_BURST);
+	vdev->nr_async_pkts -= complete_count;
+	if (complete_count)
+		free_pkts(p_cpl, complete_count);
+}
+
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1044,6 +1255,10 @@ drain_eth_rx(struct vhost_dev *vdev)

 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
 				    pkts, MAX_PKT_BURST);
+
+	while (likely(vdev->nr_async_pkts))
+		complete_async_pkts(vdev, VIRTIO_RXQ);
+
 	if (!rx_count)
 		return;

@@ -1068,16 +1283,22 @@ drain_eth_rx(struct vhost_dev *vdev)
 	if (builtin_net_driver) {
 		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
 						pkts, rx_count);
+	} else if (async_vhost_driver) {
+		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+					VIRTIO_RXQ, pkts, rx_count);
+		vdev->nr_async_pkts += enqueue_count;
 	} else {
 		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 						pkts, rx_count);
 	}
+
 	if (enable_stats) {
 		rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
 		rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
 	}

-	free_pkts(pkts, rx_count);
+	if (!async_vhost_driver)
+		free_pkts(pkts, rx_count);
 }

 static __rte_always_inline void
@@ -1224,6 +1445,9 @@ destroy_device(int vid)
 		"(%d) device has been removed from data core\n",
 		vdev->vid);

+	if (async_vhost_driver)
+		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+
 	rte_free(vdev);
 }

@@ -1238,6 +1462,12 @@ new_device(int vid)
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;

+	struct rte_vhost_async_channel_ops channel_ops = {
+		.transfer_data = ioat_transfer_data_cb,
+		.check_completed_copies = ioat_check_completed_copies_cb
+	};
+	struct rte_vhost_async_features f;
+
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
 	if (vdev == NULL) {
 		RTE_LOG(INFO, VHOST_DATA,
@@ -1278,6 +1508,13 @@ new_device(int vid)
 		"(%d) device has been added to data core %d\n",
 		vid, vdev->coreid);

+	if (async_vhost_driver) {
+		f.async_inorder = 1;
+		f.async_threshold = 256;
+		return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+			f.intval, &channel_ops);
+	}
+
 	return 0;
 }

@@ -1519,6 +1756,9 @@ main(int argc, char *argv[])
 	/* Register vhost user driver to handle vhost messages. */
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
+		if (async_vhost_driver)
+			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+
 		ret = rte_vhost_driver_register(file, flags);
 		if (ret != 0) {
 			unregister_drivers(i);
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 7cba0edbf..4317b6ae8 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
+	uint16_t nr_async_pkts;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
--
2.27.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-07-08  7:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-22  2:59 [dpdk-dev] [RFC] example/vhost: add support for vhost async data path Cheng Jiang
2020-06-23  1:54 ` Xia, Chenbo
2020-07-08  3:19   ` Jiang, Cheng1
2020-07-08  7:26 ` [dpdk-dev] [RFC v2] " Cheng Jiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).