From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 79992A0545; Mon, 29 Aug 2022 02:58:29 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 0995C40DDA; Mon, 29 Aug 2022 02:58:29 +0200 (CEST) Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by mails.dpdk.org (Postfix) with ESMTP id 9767C4069F for ; Mon, 29 Aug 2022 02:58:27 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1661734707; x=1693270707; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=YSIffM5/RmLNy/wZAkZsPypiAR1/qT7jzjoPne8bAmY=; b=ICQhEHyvP1Mq7gpsAfZngPxMMiwc9nLzYdcl8vPXdnxbdX+uXgO5Z8oS UmqnghXTiFWsRfidDtWLlD/7ciFL9Hzu3INZefqAADRi2JjtcRW/8x6sm +WLEp/R6LKfCvXubNKH7ETqKpW0lFIlhP1bDfuvM4I3Q+zdcaghk/NVpB ZvGBIJdun38wy/LQUW75F79Vwwy0TFiI5yfPM8Wlk7hp62ZFMYmyTTEvv cyDxSqrGYdF3AE2dq0hqKq9v4Y4YC5+xsgwNwfNr/der5fz6u+FOgcMFg Ciwc9TtGYktWmwnCWcl9XOecl6DmFeHYU2mghzIhvmOWRdWTQ1v7hyhoP g==; X-IronPort-AV: E=McAfee;i="6500,9779,10453"; a="274532581" X-IronPort-AV: E=Sophos;i="5.93,271,1654585200"; d="scan'208";a="274532581" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Aug 2022 17:58:26 -0700 X-IronPort-AV: E=Sophos;i="5.93,271,1654585200"; d="scan'208";a="640729571" Received: from unknown (HELO localhost.localdomain) ([10.239.252.251]) by orsmga008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Aug 2022 17:58:22 -0700 From: Wenwu Ma To: maxime.coquelin@redhat.com, chenbo.xia@intel.com, dev@dpdk.org Cc: sunil.pai.g@intel.com, jiayu.hu@intel.com, yinan.wang@intel.com, xingguang.he@intel.com, xuan.ding@intel.com, cheng1.jiang@intel.com, yuanx.wang@intel.com, Wenwu Ma Subject: [PATCH v4] vhost: support CPU copy for small packets Date: Mon, 29 Aug 2022 08:56:58 +0800 Message-Id: <20220829005658.84590-1-wenwux.ma@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220812064517.272530-1-wenwux.ma@intel.com> References: <20220812064517.272530-1-wenwux.ma@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Offloading small packets to DMA degrades throughput 10%~20%, and this is because DMA offloading is not free and DMA is not good at processing small packets. In addition, control plane packets are usually small, and assign those packets to DMA will significantly increase latency, which may cause timeout like TCP handshake packets. Therefore, this patch use CPU to perform small copies in vhost. Signed-off-by: Wenwu Ma --- v4: * fix coding style issues v3: * compare threshold with entire packet length v2: * fix CI build error --- lib/vhost/vhost.h | 7 ++-- lib/vhost/virtio_net.c | 73 +++++++++++++++++++++++++++++++++--------- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index 40fac3b7c6..8a7d90f737 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -142,8 +142,10 @@ struct virtqueue_stats { * iovec */ struct vhost_iovec { - void *src_addr; - void *dst_addr; + void *src_iov_addr; + void *dst_iov_addr; + void *src_virt_addr; + void *dst_virt_addr; size_t len; }; @@ -155,6 +157,7 @@ struct vhost_iov_iter { struct vhost_iovec *iov; /** number of iovec in this iterator */ unsigned long nr_segs; + unsigned long nr_len; }; struct async_dma_vchan_info { diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index 35fa4670fd..cf796183a0 100644 --- a/lib/vhost/virtio_net.c +++ b/lib/vhost/virtio_net.c @@ -26,6 +26,8 @@ #define MAX_BATCH_LEN 256 +#define CPU_COPY_THRESHOLD_LEN 256 + static __rte_always_inline uint16_t async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, @@ -119,8 +121,8 @@ vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, return -1; for (i = 0; i < nr_segs; i++) { - copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr, - (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); + copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_iov_addr, + (rte_iova_t)iov[i].dst_iov_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC); /** * Since all memory is pinned and DMA vChannel * ring has enough space, failure should be a @@ -149,6 +151,22 @@ vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq, return nr_segs; } +static __rte_always_inline int64_t +vhost_async_cpu_transfer_one(struct vhost_virtqueue *vq, uint16_t flag_idx, + struct vhost_iov_iter *pkt) +{ + uint16_t i; + struct vhost_iovec *iov = pkt->iov; + uint32_t nr_segs = pkt->nr_segs; + + for (i = 0; i < nr_segs; i++) + rte_memcpy(iov[i].dst_virt_addr, iov[i].src_virt_addr, iov[i].len); + + vq->async->pkts_cmpl_flag[flag_idx] = true; + + return 0; +} + static __rte_always_inline uint16_t vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, @@ -161,8 +179,13 @@ vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_spinlock_lock(&dma_info->dma_lock); for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) { - ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, - &pkts[pkt_idx]); + if (pkts[pkt_idx].nr_len > CPU_COPY_THRESHOLD_LEN) { + ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx, + &pkts[pkt_idx]); + } else { + ret = vhost_async_cpu_transfer_one(vq, head_idx, &pkts[pkt_idx]); + } + if (unlikely(ret < 0)) break; @@ -1002,13 +1025,14 @@ async_iter_initialize(struct virtio_net *dev, struct vhost_async *async) iter = async->iov_iter + async->iter_idx; iter->iov = async->iovec + async->iovec_idx; iter->nr_segs = 0; + iter->nr_len = 0; return 0; } static __rte_always_inline int async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, - void *src, void *dst, size_t len) + void *src_iova, void *dst_iova, void *src_addr, void *dst_addr, size_t len) { struct vhost_iov_iter *iter; struct vhost_iovec *iovec; @@ -1027,8 +1051,10 @@ async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async, iter = async->iov_iter + async->iter_idx; iovec = async->iovec + async->iovec_idx; - iovec->src_addr = src; - iovec->dst_addr = dst; + iovec->src_iov_addr = src_iova; + iovec->dst_iov_addr = dst_iova; + iovec->src_virt_addr = src_addr; + iovec->dst_virt_addr = dst_addr; iovec->len = len; iter->nr_segs++; @@ -1051,6 +1077,7 @@ async_iter_cancel(struct vhost_async *async) iter = async->iov_iter + async->iter_idx; async->iovec_idx -= iter->nr_segs; iter->nr_segs = 0; + iter->nr_len = 0; iter->iov = NULL; } @@ -1064,13 +1091,18 @@ async_iter_reset(struct vhost_async *async) static __rte_always_inline int async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf *m, uint32_t mbuf_offset, - uint64_t buf_iova, uint32_t cpy_len, bool to_desc) + uint64_t buf_iova, uint64_t buf_addr, uint32_t cpy_len, bool to_desc) { struct vhost_async *async = vq->async; uint64_t mapped_len; uint32_t buf_offset = 0; - void *src, *dst; + void *src_iova, *dst_iova; + void *src_addr, *dst_addr; void *host_iova; + struct vhost_iov_iter *iter; + + iter = async->iov_iter + async->iter_idx; + iter->nr_len += cpy_len; while (cpy_len) { host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, @@ -1083,14 +1115,21 @@ async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq, } if (to_desc) { - src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); - dst = host_iova; + src_iova = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); + dst_iova = host_iova; + + src_addr = rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); + dst_addr = (void *)(uintptr_t)(buf_addr + buf_offset); } else { - src = host_iova; - dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); + src_iova = host_iova; + dst_iova = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset); + + src_addr = (void *)(uintptr_t)(buf_addr + buf_offset); + dst_addr = rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); } - if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len))) + if (unlikely(async_iter_add_iovec(dev, async, src_iova, dst_iova, + src_addr, dst_addr, (size_t)mapped_len))) return -1; cpy_len -= (uint32_t)mapped_len; @@ -1239,7 +1278,8 @@ mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, if (is_async) { if (async_fill_seg(dev, vq, m, mbuf_offset, - buf_iova + buf_offset, cpy_len, true) < 0) + buf_iova + buf_offset, buf_addr + buf_offset, + cpy_len, true) < 0) goto error; } else { sync_fill_seg(dev, vq, m, mbuf_offset, @@ -2737,7 +2777,8 @@ desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, if (is_async) { if (async_fill_seg(dev, vq, cur, mbuf_offset, - buf_iova + buf_offset, cpy_len, false) < 0) + buf_iova + buf_offset, buf_addr + buf_offset, + cpy_len, false) < 0) goto error; } else if (likely(hdr && cur == m)) { rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), -- 2.25.1