From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id A94D6A04C0; Tue, 29 Sep 2020 08:54:23 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 68F5C1D8D7; Tue, 29 Sep 2020 08:53:58 +0200 (CEST) Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 120091D8CB for ; Tue, 29 Sep 2020 08:53:51 +0200 (CEST) IronPort-SDR: x5lr1a6/jzHWpLAmDiYRxLBG6KjsuZy2C/SDRGOFPDt8n1UEOPK3/dedgOLIZk7lGAKi1WgS2M aElfMBuWTWtQ== X-IronPort-AV: E=McAfee;i="6000,8403,9758"; a="226274546" X-IronPort-AV: E=Sophos;i="5.77,317,1596524400"; d="scan'208";a="226274546" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga006.jf.intel.com ([10.7.209.51]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Sep 2020 23:53:51 -0700 IronPort-SDR: e3tYlhn3Et80v5ye/n/JuErwV1KUFhuletobw3UKaQvIeX6KEiBdA1NKTViLT7MTQtOd6g8bPr kc4rv8MQg2wA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.77,317,1596524400"; d="scan'208";a="312109018" Received: from dpdk_jiangcheng.sh.intel.com ([10.67.119.112]) by orsmga006.jf.intel.com with ESMTP; 28 Sep 2020 23:53:49 -0700 From: Cheng Jiang To: maxime.coquelin@redhat.com, chenbo.xia@intel.com, zhihong.wang@intel.com Cc: dev@dpdk.org, patrick.fu@intel.com, Cheng Jiang Date: Tue, 29 Sep 2020 06:42:37 +0000 Message-Id: <20200929064239.41931-3-Cheng1.jiang@intel.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20200929064239.41931-1-Cheng1.jiang@intel.com> References: <20200910064351.35513-1-Cheng1.jiang@intel.com> <20200929064239.41931-1-Cheng1.jiang@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v2 2/4] example/vhost: add support for vhost async data path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch is to implement vhost DMA operation callbacks for CBDMA PMD and add vhost async data-path in vhost sample. With providing callback implementation for CBDMA, vswitch can leverage IOAT to accelerate vhost async data-path. Signed-off-by: Cheng Jiang --- examples/vhost/ioat.c | 93 +++++++++++++++++++++++++++++++++++++++++++ examples/vhost/main.c | 61 +++++++++++++++++++++++++++- examples/vhost/main.h | 12 ++++++ 3 files changed, 165 insertions(+), 1 deletion(-) diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index ab8130d06..063666aa2 100644 --- a/examples/vhost/ioat.c +++ b/examples/vhost/ioat.c @@ -6,11 +6,13 @@ #include #include #include +#include #include "main.h" #define MAX_VHOST_DEVICE 1024 #define IOAT_RING_SIZE 4096 +#define MAX_ENQUEUED_SIZE 256 struct dma_info { struct rte_pci_addr addr; @@ -25,6 +27,15 @@ struct dma_for_vhost { struct dma_for_vhost dma_bind[MAX_VHOST_DEVICE]; +struct packet_tracker { + unsigned short size_track[MAX_ENQUEUED_SIZE]; + unsigned short next_read; + unsigned short next_write; + unsigned short last_remain; +}; + +struct packet_tracker cb_tracker[MAX_VHOST_DEVICE]; + int open_ioat(const char *value) { @@ -114,3 +125,85 @@ open_ioat(const char *value) free(input); return ret; } + +uint32_t +ioat_transfer_data_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_desc *descs, + struct rte_vhost_async_status *opaque_data, uint16_t count) +{ + int ret; + uint32_t i_desc; + int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id; + struct rte_vhost_iov_iter *src = NULL; + struct rte_vhost_iov_iter *dst = NULL; + unsigned long i_seg; + unsigned short mask = MAX_ENQUEUED_SIZE - 1; + unsigned short write = cb_tracker[dev_id].next_write; + + if (likely(!opaque_data)) { + for (i_desc = 0; i_desc < count; i_desc++) { + src = descs[i_desc].src; + dst = descs[i_desc].dst; + i_seg = 0; + while (i_seg < src->nr_segs) { + ret = rte_ioat_enqueue_copy(dev_id, + (uintptr_t)(src->iov[i_seg].iov_base) + + src->offset, + (uintptr_t)(dst->iov[i_seg].iov_base) + + dst->offset, + src->iov[i_seg].iov_len, + 0, + 0, + 0); + if (ret != 1) + break; + i_seg++; + } + write &= mask; + cb_tracker[dev_id].size_track[write] = i_seg; + write++; + } + } else { + /* Opaque data is not supported */ + return -1; + } + /* ring the doorbell */ + rte_ioat_do_copies(dev_id); + cb_tracker[dev_id].next_write = write; + return i_desc; +} + +uint32_t +ioat_check_completed_copies_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_status *opaque_data, + uint16_t max_packets) +{ + if (!opaque_data) { + uintptr_t dump[255]; + unsigned short n_seg; + unsigned short read; + unsigned short nb_packet = 0; + unsigned short mask = MAX_ENQUEUED_SIZE - 1; + int dev_id = dma_bind[vid].dmas[queue_id * 2 + + VIRTIO_RXQ].dev_id; + n_seg = rte_ioat_completed_copies(dev_id, 255, dump, dump); + n_seg = n_seg + cb_tracker[dev_id].last_remain; + read = cb_tracker[dev_id].next_read; + for (unsigned short i = 0; i < max_packets; i++) { + read &= mask; + if (n_seg >= cb_tracker[dev_id].size_track[read]) { + n_seg -= cb_tracker[dev_id].size_track[read]; + read++; + nb_packet++; + } else { + break; + } + } + cb_tracker[dev_id].next_read = read; + cb_tracker[dev_id].last_remain = n_seg; + return nb_packet; + } else { + /* Opaque data is not supported */ + return -1; + } +} diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 9b1165a78..893edbb78 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -817,9 +817,26 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, struct rte_mbuf *m) { uint16_t ret; + struct rte_mbuf *m_cpl[1]; if (builtin_net_driver) { ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); + } else if (async_vhost_driver) { + ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, + &m, 1); + + if (likely(ret)) { + dst_vdev->nr_async_pkts++; + rte_mbuf_refcnt_update(m, 1); + } + + while (likely(dst_vdev->nr_async_pkts)) { + if (rte_vhost_poll_enqueue_completed(dst_vdev->vid, + VIRTIO_RXQ, m_cpl, 1)) { + dst_vdev->nr_async_pkts--; + rte_pktmbuf_free(*m_cpl); + } + } } else { ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1); } @@ -1068,6 +1085,19 @@ drain_mbuf_table(struct mbuf_table *tx_q) } } +static __rte_always_inline void +complete_async_pkts(struct vhost_dev *vdev, uint16_t qid) +{ + struct rte_mbuf *p_cpl[MAX_PKT_BURST]; + uint16_t complete_count; + + complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, + qid, p_cpl, MAX_PKT_BURST); + vdev->nr_async_pkts -= complete_count; + if (complete_count) + free_pkts(p_cpl, complete_count); +} + static __rte_always_inline void drain_eth_rx(struct vhost_dev *vdev) { @@ -1076,6 +1106,10 @@ drain_eth_rx(struct vhost_dev *vdev) rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q, pkts, MAX_PKT_BURST); + + while (likely(vdev->nr_async_pkts)) + complete_async_pkts(vdev, VIRTIO_RXQ); + if (!rx_count) return; @@ -1100,16 +1134,22 @@ drain_eth_rx(struct vhost_dev *vdev) if (builtin_net_driver) { enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ, pkts, rx_count); + } else if (async_vhost_driver) { + enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid, + VIRTIO_RXQ, pkts, rx_count); + vdev->nr_async_pkts += enqueue_count; } else { enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, pkts, rx_count); } + if (enable_stats) { rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count); rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count); } - free_pkts(pkts, rx_count); + if (!async_vhost_driver) + free_pkts(pkts, rx_count); } static __rte_always_inline void @@ -1256,6 +1296,9 @@ destroy_device(int vid) "(%d) device has been removed from data core\n", vdev->vid); + if (async_vhost_driver) + rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); + rte_free(vdev); } @@ -1270,6 +1313,12 @@ new_device(int vid) uint32_t device_num_min = num_devices; struct vhost_dev *vdev; + struct rte_vhost_async_channel_ops channel_ops = { + .transfer_data = ioat_transfer_data_cb, + .check_completed_copies = ioat_check_completed_copies_cb + }; + struct rte_vhost_async_features f; + vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); if (vdev == NULL) { RTE_LOG(INFO, VHOST_DATA, @@ -1310,6 +1359,13 @@ new_device(int vid) "(%d) device has been added to data core %d\n", vid, vdev->coreid); + if (async_vhost_driver) { + f.async_inorder = 1; + f.async_threshold = 256; + return rte_vhost_async_channel_register(vid, VIRTIO_RXQ, + f.intval, &channel_ops); + } + return 0; } @@ -1551,6 +1607,9 @@ main(int argc, char *argv[]) /* Register vhost user driver to handle vhost messages. */ for (i = 0; i < nb_sockets; i++) { char *file = socket_files + i * PATH_MAX; + if (async_vhost_driver) + flags = flags | RTE_VHOST_USER_ASYNC_COPY; + ret = rte_vhost_driver_register(file, flags); if (ret != 0) { unregister_drivers(i); diff --git a/examples/vhost/main.h b/examples/vhost/main.h index eac18824b..e4cf13190 100644 --- a/examples/vhost/main.h +++ b/examples/vhost/main.h @@ -8,6 +8,7 @@ #include #include +#include /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 @@ -51,6 +52,7 @@ struct vhost_dev { uint64_t features; size_t hdr_len; uint16_t nr_vrings; + uint16_t nr_async_pkts; struct rte_vhost_memory *mem; struct device_statistics stats; TAILQ_ENTRY(vhost_dev) global_vdev_entry; @@ -92,3 +94,13 @@ uint16_t vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, #endif /* _MAIN_H_ */ int open_ioat(const char *value); + +uint32_t +ioat_transfer_data_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_desc *descs, + struct rte_vhost_async_status *opaque_data, uint16_t count); + +uint32_t +ioat_check_completed_copies_cb(int vid, uint16_t queue_id, + struct rte_vhost_async_status *opaque_data, + uint16_t max_packets); -- 2.27.0