From: Wenwu Ma <wenwux.ma@intel.com>
To: dev@dpdk.org
Cc: maxime.coquelin@redhat.com, chenbo.xia@intel.com,
cheng1.jiang@intel.com, jiayu.hu@intel.com,
Wenwu Ma <wenwux.ma@intel.com>
Subject: [dpdk-dev] [PATCH v4 4/4] examples/vhost: support vhost async dequeue data path
Date: Wed, 30 Jun 2021 19:27:57 +0000 [thread overview]
Message-ID: <20210630192757.91911-5-wenwux.ma@intel.com> (raw)
In-Reply-To: <20210630192757.91911-1-wenwux.ma@intel.com>
This patch is to add vhost async dequeue data-path in vhost sample.
vswitch can leverage IOAT to accelerate vhost async dequeue data-path.
Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
---
doc/guides/sample_app_ug/vhost.rst | 9 +-
examples/vhost/ioat.c | 61 ++++++++++---
examples/vhost/ioat.h | 25 ++++++
examples/vhost/main.c | 140 ++++++++++++++++++++---------
4 files changed, 177 insertions(+), 58 deletions(-)
diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst
index 9afde9c7f5..63dcf181e1 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's used in combination with dmas
**--dmas**
This parameter is used to specify the assigned DMA device of a vhost device.
Async vhost-user net driver will be used if --dmas is set. For example
---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost
-device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
-enqueue operation.
+--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
+DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
+and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
+operation. The index of the device corresponds to the socket file in order,
+that means vhost device 0 is created through the first socket file, vhost
+device 1 is created through the second socket file, and so on.
Common Issues
-------------
diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index bf4e033bdb..a305100b47 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -21,6 +21,8 @@ struct packet_tracker {
struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
+int vid2socketid[MAX_VHOST_DEVICE];
+
int
open_ioat(const char *value)
{
@@ -29,7 +31,7 @@ open_ioat(const char *value)
char *addrs = input;
char *ptrs[2];
char *start, *end, *substr;
- int64_t vid, vring_id;
+ int64_t socketid, vring_id;
struct rte_ioat_rawdev_config config;
struct rte_rawdev_info info = { .dev_private = &config };
char name[32];
@@ -60,6 +62,8 @@ open_ioat(const char *value)
goto out;
}
while (i < args_nr) {
+ char *txd, *rxd;
+ bool is_txd;
char *arg_temp = dma_arg[i];
uint8_t sub_nr;
sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
@@ -68,27 +72,38 @@ open_ioat(const char *value)
goto out;
}
- start = strstr(ptrs[0], "txd");
- if (start == NULL) {
+ int async_flag;
+ txd = strstr(ptrs[0], "txd");
+ rxd = strstr(ptrs[0], "rxd");
+ if (txd == NULL && rxd == NULL) {
ret = -1;
goto out;
+ } else if (txd) {
+ is_txd = true;
+ start = txd;
+ async_flag = ASYNC_RX_VHOST;
+ } else {
+ is_txd = false;
+ start = rxd;
+ async_flag = ASYNC_TX_VHOST;
}
start += 3;
- vid = strtol(start, &end, 0);
+ socketid = strtol(start, &end, 0);
if (end == start) {
ret = -1;
goto out;
}
- vring_id = 0 + VIRTIO_RXQ;
+ vring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ;
+
if (rte_pci_addr_parse(ptrs[1],
- &(dma_info + vid)->dmas[vring_id].addr) < 0) {
+ &(dma_info + socketid)->dmas[vring_id].addr) < 0) {
ret = -1;
goto out;
}
- rte_pci_device_name(&(dma_info + vid)->dmas[vring_id].addr,
+ rte_pci_device_name(&(dma_info + socketid)->dmas[vring_id].addr,
name, sizeof(name));
dev_id = rte_rawdev_get_dev_id(name);
if (dev_id == (uint16_t)(-ENODEV) ||
@@ -103,8 +118,9 @@ open_ioat(const char *value)
goto out;
}
- (dma_info + vid)->dmas[vring_id].dev_id = dev_id;
- (dma_info + vid)->dmas[vring_id].is_valid = true;
+ (dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
+ (dma_info + socketid)->dmas[vring_id].is_valid = true;
+ (dma_info + socketid)->async_flag |= async_flag;
config.ring_size = IOAT_RING_SIZE;
config.hdls_disable = true;
if (rte_rawdev_configure(dev_id, &info, sizeof(config)) < 0) {
@@ -126,13 +142,16 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data, uint16_t count)
{
uint32_t i_desc;
- uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
struct rte_vhost_iov_iter *src = NULL;
struct rte_vhost_iov_iter *dst = NULL;
unsigned long i_seg;
unsigned short mask = MAX_ENQUEUED_SIZE - 1;
- unsigned short write = cb_tracker[dev_id].next_write;
+ if (queue_id >= MAX_RING_COUNT)
+ return -1;
+
+ uint16_t dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id;
+ unsigned short write = cb_tracker[dev_id].next_write;
if (!opaque_data) {
for (i_desc = 0; i_desc < count; i_desc++) {
src = descs[i_desc].src;
@@ -170,16 +189,16 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
{
- if (!opaque_data) {
+ if (!opaque_data && (queue_id < MAX_RING_COUNT)) {
uintptr_t dump[255];
int n_seg;
unsigned short read, write;
unsigned short nb_packet = 0;
unsigned short mask = MAX_ENQUEUED_SIZE - 1;
unsigned short i;
+ uint16_t dev_id;
- uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
- + VIRTIO_RXQ].dev_id;
+ dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id;
n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, dump);
if (n_seg < 0) {
RTE_LOG(ERR,
@@ -215,4 +234,18 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
return -1;
}
+uint32_t get_async_flag_by_vid(int vid)
+{
+ return dma_bind[vid2socketid[vid]].async_flag;
+}
+
+uint32_t get_async_flag_by_socketid(int socketid)
+{
+ return dma_bind[socketid].async_flag;
+}
+
+void init_vid2socketid_array(int vid, int socketid)
+{
+ vid2socketid[vid] = socketid;
+}
#endif /* RTE_RAW_IOAT */
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..51111d65af 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -12,6 +12,9 @@
#define MAX_VHOST_DEVICE 1024
#define IOAT_RING_SIZE 4096
#define MAX_ENQUEUED_SIZE 4096
+#define MAX_RING_COUNT 2
+#define ASYNC_RX_VHOST 1
+#define ASYNC_TX_VHOST 2
struct dma_info {
struct rte_pci_addr addr;
@@ -20,6 +23,7 @@ struct dma_info {
};
struct dma_for_vhost {
+ int async_flag;
struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
uint16_t nr;
};
@@ -36,6 +40,10 @@ uint32_t
ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
+
+uint32_t get_async_flag_by_vid(int vid);
+uint32_t get_async_flag_by_socketid(int socketid);
+void init_vid2socketid_array(int vid, int socketid);
#else
static int open_ioat(const char *value __rte_unused)
{
@@ -59,5 +67,22 @@ ioat_check_completed_copies_cb(int vid __rte_unused,
{
return -1;
}
+
+static uint32_t
+get_async_flag_by_vid(int vid __rte_unused)
+{
+ return 0;
+}
+
+static uint32_t
+get_async_flag_by_socketid(int socketid __rte_unused)
+{
+ return 0;
+}
+
+static void
+init_vid2socketid_array(int vid __rte_unused, int socketid __rte_unused)
+{
+}
#endif
#endif /* _IOAT_H_ */
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index aebdc3a566..81d7e4cbd3 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -93,8 +93,6 @@ static int client_mode;
static int builtin_net_driver;
-static int async_vhost_driver;
-
static char *dma_type;
/* Specify timeout (in useconds) between retries on RX. */
@@ -679,7 +677,6 @@ us_vhost_parse_args(int argc, char **argv)
us_vhost_usage(prgname);
return -1;
}
- async_vhost_driver = 1;
break;
case OPT_CLIENT_NUM:
@@ -897,7 +894,7 @@ drain_vhost(struct vhost_dev *vdev)
__ATOMIC_SEQ_CST);
}
- if (!async_vhost_driver)
+ if ((get_async_flag_by_vid(vdev->vid) & ASYNC_RX_VHOST) == 0)
free_pkts(m, nr_xmit);
}
@@ -1237,10 +1234,19 @@ drain_eth_rx(struct vhost_dev *vdev)
__ATOMIC_SEQ_CST);
}
- if (!async_vhost_driver)
+ if ((get_async_flag_by_vid(vdev->vid) & ASYNC_RX_VHOST) == 0)
free_pkts(pkts, rx_count);
}
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts, uint16_t count)
+{
+ int nr_inflight;
+ return rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
+ mbuf_pool, pkts, count, &nr_inflight);
+}
+
uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
struct rte_mempool *mbuf_pool,
struct rte_mbuf **pkts, uint16_t count)
@@ -1392,12 +1398,90 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
- if (async_vhost_driver)
+ if (get_async_flag_by_vid(vid) & ASYNC_RX_VHOST)
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+ if (get_async_flag_by_vid(vid) & ASYNC_TX_VHOST)
+ rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
rte_free(vdev);
}
+static int
+get_socketid_by_vid(int vid)
+{
+ int i;
+ char ifname[PATH_MAX];
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+
+ for (i = 0; i < nb_sockets; i++) {
+ char *file = socket_files + i * PATH_MAX;
+ if (strcmp(file, ifname) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+static int
+init_vhost_queue_ops(int vid)
+{
+ int socketid = get_socketid_by_vid(vid);
+ if (socketid == -1)
+ return -1;
+
+ init_vid2socketid_array(vid, socketid);
+ if (builtin_net_driver) {
+ vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
+ vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
+ } else {
+ if (get_async_flag_by_vid(vid) & ASYNC_RX_VHOST) {
+ vdev_queue_ops[vid].enqueue_pkt_burst =
+ async_enqueue_pkts;
+ } else {
+ vdev_queue_ops[vid].enqueue_pkt_burst =
+ sync_enqueue_pkts;
+ }
+
+ if (get_async_flag_by_vid(vid) & ASYNC_TX_VHOST) {
+ vdev_queue_ops[vid].dequeue_pkt_burst =
+ async_dequeue_pkts;
+ } else {
+ vdev_queue_ops[vid].dequeue_pkt_burst =
+ sync_dequeue_pkts;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vhost_async_channel_register(int vid)
+{
+ int ret = 0;
+ struct rte_vhost_async_features f;
+ struct rte_vhost_async_channel_ops channel_ops;
+
+ if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
+ channel_ops.transfer_data = ioat_transfer_data_cb;
+ channel_ops.check_completed_copies =
+ ioat_check_completed_copies_cb;
+
+ f.async_inorder = 1;
+ f.async_threshold = 256;
+
+ if (get_async_flag_by_vid(vid) & ASYNC_RX_VHOST) {
+ ret |= rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+ f.intval, &channel_ops);
+ }
+ if (get_async_flag_by_vid(vid) & ASYNC_TX_VHOST) {
+ ret |= rte_vhost_async_channel_register(vid, VIRTIO_TXQ,
+ f.intval, &channel_ops);
+ }
+ }
+
+ return ret;
+}
+
/*
* A new device is added to a data core. First the device is added to the main linked list
* and then allocated to a specific data core.
@@ -1431,20 +1515,8 @@ new_device(int vid)
}
}
- if (builtin_net_driver) {
- vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
- vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
- } else {
- if (async_vhost_driver) {
- vdev_queue_ops[vid].enqueue_pkt_burst =
- async_enqueue_pkts;
- } else {
- vdev_queue_ops[vid].enqueue_pkt_burst =
- sync_enqueue_pkts;
- }
-
- vdev_queue_ops[vid].dequeue_pkt_burst = sync_dequeue_pkts;
- }
+ if (init_vhost_queue_ops(vid) != 0)
+ return -1;
if (builtin_net_driver)
vs_vhost_net_setup(vdev);
@@ -1473,28 +1545,13 @@ new_device(int vid)
rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
+ int ret = vhost_async_channel_register(vid);
+
RTE_LOG(INFO, VHOST_DATA,
"(%d) device has been added to data core %d\n",
vid, vdev->coreid);
- if (async_vhost_driver) {
- struct rte_vhost_async_features f;
- struct rte_vhost_async_channel_ops channel_ops;
-
- if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
- channel_ops.transfer_data = ioat_transfer_data_cb;
- channel_ops.check_completed_copies =
- ioat_check_completed_copies_cb;
-
- f.async_inorder = 1;
- f.async_threshold = 256;
-
- return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
- f.intval, &channel_ops);
- }
- }
-
- return 0;
+ return ret;
}
/*
@@ -1735,10 +1792,11 @@ main(int argc, char *argv[])
for (i = 0; i < nb_sockets; i++) {
char *file = socket_files + i * PATH_MAX;
- if (async_vhost_driver)
- flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+ uint64_t flag = flags;
+ if (get_async_flag_by_socketid(i) != 0)
+ flag |= RTE_VHOST_USER_ASYNC_COPY;
- ret = rte_vhost_driver_register(file, flags);
+ ret = rte_vhost_driver_register(file, flag);
if (ret != 0) {
unregister_drivers(i);
rte_exit(EXIT_FAILURE,
--
2.25.1
next prev parent reply other threads:[~2021-06-30 7:35 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-02 8:31 [dpdk-dev] [PATCH 0/1] lib/vhost: support async dequeue for split ring Yuan Wang
2021-06-02 8:31 ` [dpdk-dev] [PATCH 1/1] " Yuan Wang
2021-06-07 16:17 ` Maxime Coquelin
2021-06-09 1:21 ` Hu, Jiayu
2021-06-18 20:03 ` [dpdk-dev] [PATCH v2 0/4] vhost: " Wenwu Ma
2021-06-18 14:10 ` Maxime Coquelin
2021-06-18 20:03 ` [dpdk-dev] [PATCH v2 1/4] examples/vhost: refactor vhost enqueue and dequeue datapaths Wenwu Ma
2021-06-18 20:03 ` [dpdk-dev] [PATCH v2 2/4] examples/vhost: use a new API to query remaining ring space Wenwu Ma
2021-06-18 20:03 ` [dpdk-dev] [PATCH v2 3/4] vhost: support async dequeue for split ring Wenwu Ma
2021-06-18 20:03 ` [dpdk-dev] [PATCH v2 4/4] examples/vhost: support vhost async dequeue data path Wenwu Ma
2021-06-23 15:00 ` [dpdk-dev] [PATCH v3 0/4] vhost: support async dequeue for split ring Wenwu Ma
2021-06-23 15:00 ` [dpdk-dev] [PATCH v3 1/4] examples/vhost: refactor vhost enqueue and dequeue datapaths Wenwu Ma
2021-06-23 15:00 ` [dpdk-dev] [PATCH v3 2/4] examples/vhost: use a new API to query remaining ring space Wenwu Ma
2021-06-23 15:00 ` [dpdk-dev] [PATCH v3 3/4] vhost: support async dequeue for split ring Wenwu Ma
2021-06-23 15:00 ` [dpdk-dev] [PATCH v3 4/4] examples/vhost: support vhost async dequeue data path Wenwu Ma
2021-06-30 19:27 ` [dpdk-dev] [PATCH v4 0/4] support async dequeue for split ring Wenwu Ma
2021-06-30 19:27 ` [dpdk-dev] [PATCH v4 1/4] examples/vhost: refactor vhost enqueue and dequeue datapaths Wenwu Ma
2021-06-30 19:27 ` [dpdk-dev] [PATCH v4 2/4] examples/vhost: use a new API to query remaining ring space Wenwu Ma
2021-06-30 19:27 ` [dpdk-dev] [PATCH v4 3/4] vhost: support async dequeue for split ring Wenwu Ma
2021-06-30 19:27 ` Wenwu Ma [this message]
2021-07-05 18:11 ` [dpdk-dev] [PATCH v5 0/4] " Wenwu Ma
2021-07-05 18:11 ` [dpdk-dev] [PATCH v5 1/4] examples/vhost: refactor vhost enqueue and dequeue datapaths Wenwu Ma
2021-07-13 13:34 ` Maxime Coquelin
2021-07-05 18:11 ` [dpdk-dev] [PATCH v5 2/4] examples/vhost: use a new API to query remaining ring space Wenwu Ma
2021-07-13 13:36 ` Maxime Coquelin
2021-07-05 18:11 ` [dpdk-dev] [PATCH v5 3/4] vhost: support async dequeue for split ring Wenwu Ma
2021-07-13 14:30 ` Maxime Coquelin
2021-07-14 6:50 ` Hu, Jiayu
2021-07-15 13:18 ` Maxime Coquelin
2021-07-16 1:10 ` Hu, Jiayu
2021-07-16 7:45 ` Maxime Coquelin
2021-07-16 7:55 ` Hu, Jiayu
2021-07-16 9:02 ` Maxime Coquelin
2021-07-16 8:14 ` David Marchand
2021-07-16 13:45 ` Hu, Jiayu
2021-07-16 13:52 ` David Marchand
2021-07-16 14:00 ` Hu, Jiayu
2021-07-05 18:11 ` [dpdk-dev] [PATCH v5 4/4] examples/vhost: support vhost async dequeue data path Wenwu Ma
2021-07-13 17:01 ` Maxime Coquelin
2021-07-16 19:18 ` [dpdk-dev] [PATCH v6 0/4] support async dequeue for split ring Wenwu Ma
2021-07-16 19:18 ` [dpdk-dev] [PATCH v6 1/4] examples/vhost: refactor vhost enqueue and dequeue datapaths Wenwu Ma
2021-07-16 19:18 ` [dpdk-dev] [PATCH v6 2/4] examples/vhost: use a new API to query remaining ring space Wenwu Ma
2021-07-16 19:18 ` [dpdk-dev] [PATCH v6 3/4] vhost: support async dequeue for split ring Wenwu Ma
2021-07-16 19:18 ` [dpdk-dev] [PATCH v6 4/4] examples/vhost: support vhost async dequeue data path Wenwu Ma
2021-07-21 14:20 ` [dpdk-dev] [PATCH v7 0/4] support async dequeue for split ring Wenwu Ma
2021-07-21 2:31 ` Wang, Yinan
2021-07-21 14:20 ` [dpdk-dev] [PATCH v7 1/4] examples/vhost: refactor vhost enqueue and dequeue datapaths Wenwu Ma
2021-07-21 14:20 ` [dpdk-dev] [PATCH v7 2/4] examples/vhost: use a new API to query remaining ring space Wenwu Ma
2021-07-21 14:20 ` [dpdk-dev] [PATCH v7 3/4] vhost: support async dequeue for split ring Wenwu Ma
2021-07-21 14:20 ` [dpdk-dev] [PATCH v7 4/4] examples/vhost: support vhost async dequeue data path Wenwu Ma
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210630192757.91911-5-wenwux.ma@intel.com \
--to=wenwux.ma@intel.com \
--cc=chenbo.xia@intel.com \
--cc=cheng1.jiang@intel.com \
--cc=dev@dpdk.org \
--cc=jiayu.hu@intel.com \
--cc=maxime.coquelin@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).