From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id CDA2CA0C46; Tue, 28 Sep 2021 09:04:57 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 049EC41100; Tue, 28 Sep 2021 09:04:51 +0200 (CEST) Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by mails.dpdk.org (Postfix) with ESMTP id 47EA240DF6 for ; Tue, 28 Sep 2021 09:04:40 +0200 (CEST) X-IronPort-AV: E=McAfee;i="6200,9189,10120"; a="224700956" X-IronPort-AV: E=Sophos;i="5.85,328,1624345200"; d="scan'208";a="224700956" Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by orsmga103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Sep 2021 00:04:39 -0700 X-IronPort-AV: E=Sophos;i="5.85,328,1624345200"; d="scan'208";a="553821021" Received: from unknown (HELO localhost.localdomain) ([10.240.183.109]) by fmsmga003-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Sep 2021 00:04:37 -0700 From: Wenwu Ma To: dev@dpdk.org Cc: maxime.coquelin@redhat.com, chenbo.xia@intel.com, cheng1.jiang@intel.com, jiayu.hu@intel.com, Sunil.Pai.G@intel.com, yvonnex.yang@intel.com, Wenwu Ma Date: Tue, 28 Sep 2021 18:56:41 +0000 Message-Id: <20210928185641.86601-5-wenwux.ma@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20210928185641.86601-1-wenwux.ma@intel.com> References: <20210906204837.112466-1-wenwux.ma@intel.com> <20210928185641.86601-1-wenwux.ma@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v3 4/4] examples/vhost: support vhost async dequeue data path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch is to add vhost async dequeue data-path in vhost sample. vswitch can leverage IOAT to accelerate vhost async dequeue data-path. Signed-off-by: Wenwu Ma Reviewed-by: Maxime Coquelin Tested-by: Yvonne Yang --- doc/guides/sample_app_ug/vhost.rst | 9 +- examples/vhost/ioat.c | 61 +++++++-- examples/vhost/ioat.h | 25 ++++ examples/vhost/main.c | 201 +++++++++++++++++++---------- examples/vhost/main.h | 6 +- 5 files changed, 219 insertions(+), 83 deletions(-) diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst index 9afde9c7f5..63dcf181e1 100644 --- a/doc/guides/sample_app_ug/vhost.rst +++ b/doc/guides/sample_app_ug/vhost.rst @@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's used in combination with dmas **--dmas** This parameter is used to specify the assigned DMA device of a vhost device. Async vhost-user net driver will be used if --dmas is set. For example ---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost -device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1 -enqueue operation. +--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use +DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation +and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue +operation. The index of the device corresponds to the socket file in order, +that means vhost device 0 is created through the first socket file, vhost +device 1 is created through the second socket file, and so on. Common Issues ------------- diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index 6adc30b622..3a256b0f4c 100644 --- a/examples/vhost/ioat.c +++ b/examples/vhost/ioat.c @@ -21,6 +21,8 @@ struct packet_tracker { struct packet_tracker cb_tracker[MAX_VHOST_DEVICE]; +int vid2socketid[MAX_VHOST_DEVICE]; + int open_ioat(const char *value) { @@ -29,7 +31,7 @@ open_ioat(const char *value) char *addrs = input; char *ptrs[2]; char *start, *end, *substr; - int64_t vid, vring_id; + int64_t socketid, vring_id; struct rte_ioat_rawdev_config config; struct rte_rawdev_info info = { .dev_private = &config }; char name[32]; @@ -60,6 +62,7 @@ open_ioat(const char *value) goto out; } while (i < args_nr) { + bool is_txd; char *arg_temp = dma_arg[i]; uint8_t sub_nr; sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@'); @@ -68,27 +71,39 @@ open_ioat(const char *value) goto out; } - start = strstr(ptrs[0], "txd"); - if (start == NULL) { + int async_flag; + char *txd, *rxd; + txd = strstr(ptrs[0], "txd"); + rxd = strstr(ptrs[0], "rxd"); + if (txd) { + is_txd = true; + start = txd; + async_flag = ASYNC_ENQUEUE_VHOST; + } else if (rxd) { + is_txd = false; + start = rxd; + async_flag = ASYNC_DEQUEUE_VHOST; + } else { ret = -1; goto out; } start += 3; - vid = strtol(start, &end, 0); + socketid = strtol(start, &end, 0); if (end == start) { ret = -1; goto out; } - vring_id = 0 + VIRTIO_RXQ; + vring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ; + if (rte_pci_addr_parse(ptrs[1], - &(dma_info + vid)->dmas[vring_id].addr) < 0) { + &(dma_info + socketid)->dmas[vring_id].addr) < 0) { ret = -1; goto out; } - rte_pci_device_name(&(dma_info + vid)->dmas[vring_id].addr, + rte_pci_device_name(&(dma_info + socketid)->dmas[vring_id].addr, name, sizeof(name)); dev_id = rte_rawdev_get_dev_id(name); if (dev_id == (uint16_t)(-ENODEV) || @@ -103,8 +118,9 @@ open_ioat(const char *value) goto out; } - (dma_info + vid)->dmas[vring_id].dev_id = dev_id; - (dma_info + vid)->dmas[vring_id].is_valid = true; + (dma_info + socketid)->dmas[vring_id].dev_id = dev_id; + (dma_info + socketid)->dmas[vring_id].is_valid = true; + (dma_info + socketid)->async_flag |= async_flag; config.ring_size = IOAT_RING_SIZE; config.hdls_disable = true; if (rte_rawdev_configure(dev_id, &info, sizeof(config)) < 0) { @@ -126,13 +142,16 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id, struct rte_vhost_async_status *opaque_data, uint16_t count) { uint32_t i_desc; - uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id; struct rte_vhost_iov_iter *src = NULL; struct rte_vhost_iov_iter *dst = NULL; unsigned long i_seg; unsigned short mask = MAX_ENQUEUED_SIZE - 1; - unsigned short write = cb_tracker[dev_id].next_write; + if (queue_id >= MAX_RING_COUNT) + return -1; + + uint16_t dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id; + unsigned short write = cb_tracker[dev_id].next_write; if (!opaque_data) { for (i_desc = 0; i_desc < count; i_desc++) { src = descs[i_desc].src; @@ -170,16 +189,16 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id, struct rte_vhost_async_status *opaque_data, uint16_t max_packets) { - if (!opaque_data) { + if (!opaque_data && queue_id < MAX_RING_COUNT) { uintptr_t dump[255]; int n_seg; unsigned short read, write; unsigned short nb_packet = 0; unsigned short mask = MAX_ENQUEUED_SIZE - 1; unsigned short i; + uint16_t dev_id; - uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 - + VIRTIO_RXQ].dev_id; + dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id; n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, dump); if (n_seg < 0) { RTE_LOG(ERR, @@ -215,4 +234,18 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id, return -1; } +uint32_t get_async_flag_by_vid(int vid) +{ + return dma_bind[vid2socketid[vid]].async_flag; +} + +uint32_t get_async_flag_by_socketid(int socketid) +{ + return dma_bind[socketid].async_flag; +} + +void init_vid2socketid_array(int vid, int socketid) +{ + vid2socketid[vid] = socketid; +} #endif /* RTE_RAW_IOAT */ diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h index 62e163c585..105cee556d 100644 --- a/examples/vhost/ioat.h +++ b/examples/vhost/ioat.h @@ -12,6 +12,9 @@ #define MAX_VHOST_DEVICE 1024 #define IOAT_RING_SIZE 4096 #define MAX_ENQUEUED_SIZE 4096 +#define MAX_RING_COUNT 2 +#define ASYNC_ENQUEUE_VHOST 1 +#define ASYNC_DEQUEUE_VHOST 2 struct dma_info { struct rte_pci_addr addr; @@ -20,6 +23,7 @@ struct dma_info { }; struct dma_for_vhost { + uint32_t async_flag; struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2]; uint16_t nr; }; @@ -36,6 +40,10 @@ int32_t ioat_check_completed_copies_cb(int vid, uint16_t queue_id, struct rte_vhost_async_status *opaque_data, uint16_t max_packets); + +uint32_t get_async_flag_by_vid(int vid); +uint32_t get_async_flag_by_socketid(int socketid); +void init_vid2socketid_array(int vid, int socketid); #else static int open_ioat(const char *value __rte_unused) { @@ -59,5 +67,22 @@ ioat_check_completed_copies_cb(int vid __rte_unused, { return -1; } + +static uint32_t +get_async_flag_by_vid(int vid __rte_unused) +{ + return 0; +} + +static uint32_t +get_async_flag_by_socketid(int socketid __rte_unused) +{ + return 0; +} + +static void +init_vid2socketid_array(int vid __rte_unused, int socketid __rte_unused) +{ +} #endif #endif /* _IOAT_H_ */ diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 254f7097bc..572ffc12ae 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -93,8 +93,6 @@ static int client_mode; static int builtin_net_driver; -static int async_vhost_driver; - static char *dma_type; /* Specify timeout (in useconds) between retries on RX. */ @@ -673,7 +671,6 @@ us_vhost_parse_args(int argc, char **argv) us_vhost_usage(prgname); return -1; } - async_vhost_driver = 1; break; case OPT_CLIENT_NUM: @@ -846,7 +843,8 @@ complete_async_pkts(struct vhost_dev *vdev) VIRTIO_RXQ, p_cpl, MAX_PKT_BURST); if (complete_count) { free_pkts(p_cpl, complete_count); - __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST); + __atomic_sub_fetch(&vdev->pkts_enq_inflight, + complete_count, __ATOMIC_SEQ_CST); } } @@ -891,7 +889,7 @@ drain_vhost(struct vhost_dev *vdev) __ATOMIC_SEQ_CST); } - if (!async_vhost_driver) + if ((get_async_flag_by_vid(vdev->vid) & ASYNC_ENQUEUE_VHOST) == 0) free_pkts(m, nr_xmit); } @@ -1171,8 +1169,8 @@ async_enqueue_pkts(struct vhost_dev *vdev, uint16_t queue_id, complete_async_pkts(vdev); enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid, queue_id, pkts, rx_count); - __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count, - __ATOMIC_SEQ_CST); + __atomic_add_fetch(&vdev->pkts_enq_inflight, + enqueue_count, __ATOMIC_SEQ_CST); enqueue_fail = rx_count - enqueue_count; if (enqueue_fail) @@ -1228,10 +1226,23 @@ drain_eth_rx(struct vhost_dev *vdev) __ATOMIC_SEQ_CST); } - if (!async_vhost_driver) + if ((get_async_flag_by_vid(vdev->vid) & ASYNC_ENQUEUE_VHOST) == 0) free_pkts(pkts, rx_count); } +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, uint16_t count) +{ + int nr_inflight; + uint16_t dequeue_count; + dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid, queue_id, + mbuf_pool, pkts, count, &nr_inflight); + if (likely(nr_inflight != -1)) + dev->pkts_deq_inflight = nr_inflight; + return dequeue_count; +} + uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) @@ -1327,6 +1338,32 @@ switch_worker(void *arg __rte_unused) return 0; } +static void +vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t queue_id) +{ + uint16_t n_pkt = 0; + struct rte_mbuf *m_enq_cpl[vdev->pkts_enq_inflight]; + struct rte_mbuf *m_deq_cpl[vdev->pkts_deq_inflight]; + + if (queue_id % 2 == 0) { + while (vdev->pkts_enq_inflight) { + n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid, + queue_id, m_enq_cpl, vdev->pkts_enq_inflight); + free_pkts(m_enq_cpl, n_pkt); + __atomic_sub_fetch(&vdev->pkts_enq_inflight, + n_pkt, __ATOMIC_SEQ_CST); + } + } else { + while (vdev->pkts_deq_inflight) { + n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid, + queue_id, m_deq_cpl, vdev->pkts_deq_inflight); + free_pkts(m_deq_cpl, n_pkt); + __atomic_sub_fetch(&vdev->pkts_deq_inflight, + n_pkt, __ATOMIC_SEQ_CST); + } + } +} + /* * Remove a device from the specific data core linked list and from the * main linked list. Synchonization occurs through the use of the @@ -1383,21 +1420,91 @@ destroy_device(int vid) "(%d) device has been removed from data core\n", vdev->vid); - if (async_vhost_driver) { - uint16_t n_pkt = 0; - struct rte_mbuf *m_cpl[vdev->pkts_inflight]; + if (get_async_flag_by_vid(vid) & ASYNC_ENQUEUE_VHOST) { + vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ); + rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); + } + if (get_async_flag_by_vid(vid) & ASYNC_DEQUEUE_VHOST) { + vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ); + rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ); + } + + rte_free(vdev); +} + +static int +get_socketid_by_vid(int vid) +{ + int i; + char ifname[PATH_MAX]; + rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); + + for (i = 0; i < nb_sockets; i++) { + char *file = socket_files + i * PATH_MAX; + if (strcmp(file, ifname) == 0) + return i; + } + + return -1; +} + +static int +init_vhost_queue_ops(int vid) +{ + int socketid = get_socketid_by_vid(vid); + if (socketid == -1) + return -1; + + init_vid2socketid_array(vid, socketid); + if (builtin_net_driver) { + vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts; + vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts; + } else { + if (get_async_flag_by_vid(vid) & ASYNC_ENQUEUE_VHOST) { + vdev_queue_ops[vid].enqueue_pkt_burst = + async_enqueue_pkts; + } else { + vdev_queue_ops[vid].enqueue_pkt_burst = + sync_enqueue_pkts; + } - while (vdev->pkts_inflight) { - n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, VIRTIO_RXQ, - m_cpl, vdev->pkts_inflight); - free_pkts(m_cpl, n_pkt); - __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST); + if (get_async_flag_by_vid(vid) & ASYNC_DEQUEUE_VHOST) { + vdev_queue_ops[vid].dequeue_pkt_burst = + async_dequeue_pkts; + } else { + vdev_queue_ops[vid].dequeue_pkt_burst = + sync_dequeue_pkts; } + } - rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); + return 0; +} + +static int +vhost_async_channel_register(int vid) +{ + int ret = 0; + struct rte_vhost_async_config config = {0}; + struct rte_vhost_async_channel_ops channel_ops; + + if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) { + channel_ops.transfer_data = ioat_transfer_data_cb; + channel_ops.check_completed_copies = + ioat_check_completed_copies_cb; + + config.features = RTE_VHOST_ASYNC_INORDER; + + if (get_async_flag_by_vid(vid) & ASYNC_ENQUEUE_VHOST) { + ret |= rte_vhost_async_channel_register(vid, VIRTIO_RXQ, + config, &channel_ops); + } + if (get_async_flag_by_vid(vid) & ASYNC_DEQUEUE_VHOST) { + ret |= rte_vhost_async_channel_register(vid, VIRTIO_TXQ, + config, &channel_ops); + } } - rte_free(vdev); + return ret; } /* @@ -1433,20 +1540,8 @@ new_device(int vid) } } - if (builtin_net_driver) { - vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts; - vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts; - } else { - if (async_vhost_driver) { - vdev_queue_ops[vid].enqueue_pkt_burst = - async_enqueue_pkts; - } else { - vdev_queue_ops[vid].enqueue_pkt_burst = - sync_enqueue_pkts; - } - - vdev_queue_ops[vid].dequeue_pkt_burst = sync_dequeue_pkts; - } + if (init_vhost_queue_ops(vid) != 0) + return -1; if (builtin_net_driver) vs_vhost_net_setup(vdev); @@ -1475,27 +1570,13 @@ new_device(int vid) rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0); rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0); + int ret = vhost_async_channel_register(vid); + RTE_LOG(INFO, VHOST_DATA, "(%d) device has been added to data core %d\n", vid, vdev->coreid); - if (async_vhost_driver) { - struct rte_vhost_async_config config = {0}; - struct rte_vhost_async_channel_ops channel_ops; - - if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) { - channel_ops.transfer_data = ioat_transfer_data_cb; - channel_ops.check_completed_copies = - ioat_check_completed_copies_cb; - - config.features = RTE_VHOST_ASYNC_INORDER; - - return rte_vhost_async_channel_register(vid, VIRTIO_RXQ, - config, &channel_ops); - } - } - - return 0; + return ret; } static int @@ -1513,19 +1594,8 @@ vring_state_changed(int vid, uint16_t queue_id, int enable) if (queue_id != VIRTIO_RXQ) return 0; - if (async_vhost_driver) { - if (!enable) { - uint16_t n_pkt = 0; - struct rte_mbuf *m_cpl[vdev->pkts_inflight]; - - while (vdev->pkts_inflight) { - n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, queue_id, - m_cpl, vdev->pkts_inflight); - free_pkts(m_cpl, n_pkt); - __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST); - } - } - } + if (!enable) + vhost_clear_queue_thread_unsafe(vdev, queue_id); return 0; } @@ -1769,10 +1839,11 @@ main(int argc, char *argv[]) for (i = 0; i < nb_sockets; i++) { char *file = socket_files + i * PATH_MAX; - if (async_vhost_driver) - flags = flags | RTE_VHOST_USER_ASYNC_COPY; + uint64_t flag = flags; + if (get_async_flag_by_socketid(i) != 0) + flag |= RTE_VHOST_USER_ASYNC_COPY; - ret = rte_vhost_driver_register(file, flags); + ret = rte_vhost_driver_register(file, flag); if (ret != 0) { unregister_drivers(i); rte_exit(EXIT_FAILURE, diff --git a/examples/vhost/main.h b/examples/vhost/main.h index 2c5a558f12..5af7e7d97f 100644 --- a/examples/vhost/main.h +++ b/examples/vhost/main.h @@ -51,7 +51,8 @@ struct vhost_dev { uint64_t features; size_t hdr_len; uint16_t nr_vrings; - uint16_t pkts_inflight; + uint16_t pkts_enq_inflight; + uint16_t pkts_deq_inflight; struct rte_vhost_memory *mem; struct device_statistics stats; TAILQ_ENTRY(vhost_dev) global_vdev_entry; @@ -112,4 +113,7 @@ uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint16_t count); uint16_t async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count); +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, uint16_t count); #endif /* _MAIN_H_ */ -- 2.25.1