From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 24D06A04DD; Wed, 21 Oct 2020 09:04:33 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 2C736ACBB; Wed, 21 Oct 2020 09:04:06 +0200 (CEST) Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by dpdk.org (Postfix) with ESMTP id 5927AAC74 for ; Wed, 21 Oct 2020 09:03:58 +0200 (CEST) IronPort-SDR: 9kjUIgkUlT6oChIi3/grBvzVo++73bL62z9ba5nKRcXPSXR+8JucuJ+VUF/ibXk3NhluMsVSZU O2McWDfVICuA== X-IronPort-AV: E=McAfee;i="6000,8403,9780"; a="146617992" X-IronPort-AV: E=Sophos;i="5.77,400,1596524400"; d="scan'208";a="146617992" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Oct 2020 00:03:58 -0700 IronPort-SDR: qIW+LbH5sleeV7f8h5s/dawAMt1Rg+RihchJ5UVpEuIlsRvBftq/EpPLME0oI+qrncozpA6NZc +CXljImc218w== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.77,400,1596524400"; d="scan'208";a="523795314" Received: from dpdk_jiangcheng.sh.intel.com ([10.67.119.112]) by fmsmga005.fm.intel.com with ESMTP; 21 Oct 2020 00:03:54 -0700 From: Cheng Jiang To: maxime.coquelin@redhat.com, chenbo.xia@intel.com Cc: dev@dpdk.org, patrick.fu@intel.com, YvonneX.Yang@intel.com, Cheng Jiang Date: Wed, 21 Oct 2020 06:50:42 +0000 Message-Id: <20201021065044.31839-3-Cheng1.jiang@intel.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20201021065044.31839-1-Cheng1.jiang@intel.com> References: <20201020112058.77168-1-Cheng1.jiang@intel.com> <20201021065044.31839-1-Cheng1.jiang@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v8 2/4] example/vhost: add support for vhost async data path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch is to implement vhost DMA operation callbacks for CBDMA PMD and add vhost async data-path in vhost sample. With providing callback implementation for CBDMA, vswitch can leverage IOAT to accelerate vhost async data-path. Signed-off-by: Cheng Jiang --- examples/vhost/ioat.c | 100 ++++++++++++++++++++++++++++++++++++++++++ examples/vhost/main.c | 57 +++++++++++++++++++++++- examples/vhost/main.h | 12 +++++ 3 files changed, 168 insertions(+), 1 deletion(-) diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index c3158d3c3..fa503c3db 100644 --- a/examples/vhost/ioat.c +++ b/examples/vhost/ioat.c @@ -6,11 +6,13 @@ #include #include #include +#include #include "main.h" #define MAX_VHOST_DEVICE 1024 #define IOAT_RING_SIZE 4096 +#define MAX_ENQUEUED_SIZE 256 struct dma_info { struct rte_pci_addr addr; @@ -25,6 +27,15 @@ struct dma_for_vhost { struct dma_for_vhost dma_bind[MAX_VHOST_DEVICE]; +struct packet_tracker { + unsigned short size_track[MAX_ENQUEUED_SIZE]; + unsigned short next_read; + unsigned short next_write; + unsigned short last_remain; +}; + +struct packet_tracker cb_tracker[MAX_VHOST_DEVICE]; + int open_ioat(const char *value) { @@ -115,3 +126,92 @@ open_ioat(const char *value) free(input); return ret; } + +uint32_t +ioat_transfer_data_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_desc *descs, + struct rte_vhost_async_status *opaque_data, uint16_t count) +{ + uint32_t i_desc; + int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id; + struct rte_vhost_iov_iter *src = NULL; + struct rte_vhost_iov_iter *dst = NULL; + unsigned long i_seg; + unsigned short mask = MAX_ENQUEUED_SIZE - 1; + unsigned short write = cb_tracker[dev_id].next_write; + + if (!opaque_data) { + for (i_desc = 0; i_desc < count; i_desc++) { + src = descs[i_desc].src; + dst = descs[i_desc].dst; + i_seg = 0; + while (i_seg < src->nr_segs) { + /* + * TODO: Assuming that the ring space of the + * IOAT device is large enough, so there is no + * error here, and the actual error handling + * will be added later. + */ + rte_ioat_enqueue_copy(dev_id, + (uintptr_t)(src->iov[i_seg].iov_base) + + src->offset, + (uintptr_t)(dst->iov[i_seg].iov_base) + + dst->offset, + src->iov[i_seg].iov_len, + 0, + 0); + i_seg++; + } + write &= mask; + cb_tracker[dev_id].size_track[write] = i_seg; + write++; + } + } else { + /* Opaque data is not supported */ + return -1; + } + /* ring the doorbell */ + rte_ioat_perform_ops(dev_id); + cb_tracker[dev_id].next_write = write; + return i_desc; +} + +uint32_t +ioat_check_completed_copies_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_status *opaque_data, + uint16_t max_packets) +{ + if (!opaque_data) { + uintptr_t dump[255]; + unsigned short n_seg; + unsigned short read, write; + unsigned short nb_packet = 0; + unsigned short mask = MAX_ENQUEUED_SIZE - 1; + unsigned short i; + int dev_id = dma_bind[vid].dmas[queue_id * 2 + + VIRTIO_RXQ].dev_id; + n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump); + n_seg += cb_tracker[dev_id].last_remain; + if (!n_seg) + return 0; + read = cb_tracker[dev_id].next_read; + write = cb_tracker[dev_id].next_write; + for (i = 0; i < max_packets; i++) { + read &= mask; + if (read == write) + break; + if (n_seg >= cb_tracker[dev_id].size_track[read]) { + n_seg -= cb_tracker[dev_id].size_track[read]; + read++; + nb_packet++; + } else { + break; + } + } + cb_tracker[dev_id].next_read = read; + cb_tracker[dev_id].last_remain = n_seg; + return nb_packet; + } + /* Opaque data is not supported */ + return -1; +} diff --git a/examples/vhost/main.c b/examples/vhost/main.c index d759cae2c..896f5f781 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -804,9 +804,22 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, struct rte_mbuf *m) { uint16_t ret; + struct rte_mbuf *m_cpl[1]; if (builtin_net_driver) { ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); + } else if (async_vhost_driver) { + ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, + &m, 1); + + if (likely(ret)) + dst_vdev->nr_async_pkts++; + + while (likely(dst_vdev->nr_async_pkts)) { + if (rte_vhost_poll_enqueue_completed(dst_vdev->vid, + VIRTIO_RXQ, m_cpl, 1)) + dst_vdev->nr_async_pkts--; + } } else { ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1); } @@ -1055,6 +1068,19 @@ drain_mbuf_table(struct mbuf_table *tx_q) } } +static __rte_always_inline void +complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) +{ + struct rte_mbuf *p_cpl[MAX_PKT_BURST]; + uint16_t complete_count; + + complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, + qid, p_cpl, MAX_PKT_BURST); + vdev->nr_async_pkts -= complete_count; + if (complete_count) + free_pkts(p_cpl, complete_count); +} + static __rte_always_inline void drain_eth_rx(struct vhost_dev *vdev) { @@ -1063,6 +1089,10 @@ drain_eth_rx(struct vhost_dev *vdev) rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q, pkts, MAX_PKT_BURST); + + while (likely(vdev->nr_async_pkts)) + complete_async_pkts(vdev, VIRTIO_RXQ); + if (!rx_count) return; @@ -1087,16 +1117,22 @@ drain_eth_rx(struct vhost_dev *vdev) if (builtin_net_driver) { enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ, pkts, rx_count); + } else if (async_vhost_driver) { + enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid, + VIRTIO_RXQ, pkts, rx_count); + vdev->nr_async_pkts += enqueue_count; } else { enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, pkts, rx_count); } + if (enable_stats) { rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count); rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count); } - free_pkts(pkts, rx_count); + if (!async_vhost_driver) + free_pkts(pkts, rx_count); } static __rte_always_inline void @@ -1243,6 +1279,9 @@ destroy_device(int vid) "(%d) device has been removed from data core\n", vdev->vid); + if (async_vhost_driver) + rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); + rte_free(vdev); } @@ -1257,6 +1296,12 @@ new_device(int vid) uint32_t device_num_min = num_devices; struct vhost_dev *vdev; + struct rte_vhost_async_channel_ops channel_ops = { + .transfer_data = ioat_transfer_data_cb, + .check_completed_copies = ioat_check_completed_copies_cb + }; + struct rte_vhost_async_features f; + vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); if (vdev == NULL) { RTE_LOG(INFO, VHOST_DATA, @@ -1297,6 +1342,13 @@ new_device(int vid) "(%d) device has been added to data core %d\n", vid, vdev->coreid); + if (async_vhost_driver) { + f.async_inorder = 1; + f.async_threshold = 256; + return rte_vhost_async_channel_register(vid, VIRTIO_RXQ, + f.intval, &channel_ops); + } + return 0; } @@ -1535,6 +1587,9 @@ main(int argc, char *argv[]) /* Register vhost user driver to handle vhost messages. */ for (i = 0; i < nb_sockets; i++) { char *file = socket_files + i * PATH_MAX; + if (async_vhost_driver) + flags = flags | RTE_VHOST_USER_ASYNC_COPY; + ret = rte_vhost_driver_register(file, flags); if (ret != 0) { unregister_drivers(i); diff --git a/examples/vhost/main.h b/examples/vhost/main.h index fe83d255b..5a628473e 100644 --- a/examples/vhost/main.h +++ b/examples/vhost/main.h @@ -8,6 +8,7 @@ #include #include +#include /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 @@ -51,6 +52,7 @@ struct vhost_dev { uint64_t features; size_t hdr_len; uint16_t nr_vrings; + uint16_t nr_async_pkts; struct rte_vhost_memory *mem; struct device_statistics stats; TAILQ_ENTRY(vhost_dev) global_vdev_entry; @@ -103,4 +105,14 @@ static int open_ioat(const char *value __rte_unused) #endif +uint32_t +ioat_transfer_data_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_desc *descs, + struct rte_vhost_async_status *opaque_data, uint16_t count); + +uint32_t +ioat_check_completed_copies_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_status *opaque_data, + uint16_t max_packets); + #endif /* _MAIN_H_ */ -- 2.27.0