From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6C71DA0524; Mon, 27 Jul 2020 08:36:09 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id AF13D1BFE8; Mon, 27 Jul 2020 08:36:07 +0200 (CEST) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 64C721BFD9 for ; Mon, 27 Jul 2020 08:36:04 +0200 (CEST) IronPort-SDR: wKVu76Ek7DRyQw/ojo6oLhBB8q+ph0Yq10CTuk+RX+g647gU4Da2+MGaulI9+rX64WvkDTkjLG 1ZEcVfYHvRxQ== X-IronPort-AV: E=McAfee;i="6000,8403,9694"; a="169076250" X-IronPort-AV: E=Sophos;i="5.75,401,1589266800"; d="scan'208";a="169076250" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Jul 2020 23:36:04 -0700 IronPort-SDR: wMa+CS/AJyAmJ8vOXTWkIHE1kI06jZ+dZt41tuVxcjeWOmD1oSs9Phc2dVfxa2AD666pXrs13w oxsStXnl4VNw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,401,1589266800"; d="scan'208";a="463955329" Received: from npg-dpdk-patrickfu-casc2.sh.intel.com ([10.67.119.92]) by orsmga005.jf.intel.com with ESMTP; 26 Jul 2020 23:36:02 -0700 From: patrick.fu@intel.com To: dev@dpdk.org, maxime.coquelin@redhat.com, chenbo.xia@intel.com Cc: Patrick Fu Date: Mon, 27 Jul 2020 14:33:07 +0800 Message-Id: <20200727063307.3703071-1-patrick.fu@intel.com> X-Mailer: git-send-email 2.18.4 In-Reply-To: <20200720025242.3066787-1-patrick.fu@intel.com> References: <20200720025242.3066787-1-patrick.fu@intel.com> Subject: [dpdk-dev] [PATCH v3] vhost: fix async copy fail on multi-page buffers X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Patrick Fu Async copy fails when single ring buffer vector is splited on multiple physical pages. This happens because current hpa address translation function doesn't handle multi-page buffers. A new gpa to hpa address conversion function, which returns the hpa on the first hitting host pages, is implemented in this patch. Async data path recursively calls this new function to construct a multi-segments async copy descriptor for ring buffers crossing physical page boundaries. Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring") Signed-off-by: Patrick Fu --- v2: - change commit message and title - v1 patch used CPU to copy multi-page buffers; v2 patch split the copy into multiple async copy segments whenever possible v3: - added fixline lib/librte_vhost/vhost.h | 50 +++++++++++++++++++++++++++++++++++ lib/librte_vhost/virtio_net.c | 40 +++++++++++++++++----------- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 0f7212f88..05c202a57 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -616,6 +616,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) return 0; } +static __rte_always_inline rte_iova_t +gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, + uint64_t gpa_size, uint64_t *hpa_size) +{ + uint32_t i; + struct guest_page *page; + struct guest_page key; + + *hpa_size = gpa_size; + if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) { + key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1); + page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages, + sizeof(struct guest_page), guest_page_addrcmp); + if (page) { + if (gpa + gpa_size <= + page->guest_phys_addr + page->size) { + return gpa - page->guest_phys_addr + + page->host_phys_addr; + } else if (gpa < page->guest_phys_addr + + page->size) { + *hpa_size = page->guest_phys_addr + + page->size - gpa; + return gpa - page->guest_phys_addr + + page->host_phys_addr; + } + } + } else { + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + + if (gpa >= page->guest_phys_addr) { + if (gpa + gpa_size < + page->guest_phys_addr + page->size) { + return gpa - page->guest_phys_addr + + page->host_phys_addr; + } else if (gpa < page->guest_phys_addr + + page->size) { + *hpa_size = page->guest_phys_addr + + page->size - gpa; + return gpa - page->guest_phys_addr + + page->host_phys_addr; + } + } + } + } + + *hpa_size = 0; + return 0; +} + static __rte_always_inline uint64_t hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len) { diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 95a0bc19f..124a33a10 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, struct batch_copy_elem *batch_copy = vq->batch_copy_elems; struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; int error = 0; + uint64_t mapped_len; uint32_t tlen = 0; int tvec_idx = 0; @@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, cpy_len = RTE_MIN(buf_avail, mbuf_avail); - if (unlikely(cpy_len >= cpy_threshold)) { - hpa = (void *)(uintptr_t)gpa_to_hpa(dev, - buf_iova + buf_offset, cpy_len); + while (unlikely(cpy_len && cpy_len >= cpy_threshold)) { + hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev, + buf_iova + buf_offset, + cpy_len, &mapped_len); - if (unlikely(!hpa)) { - error = -1; - goto out; - } + if (unlikely(!hpa || mapped_len < cpy_threshold)) + break; async_fill_vec(src_iovec + tvec_idx, (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, - mbuf_offset), cpy_len); + mbuf_offset), (size_t)mapped_len); - async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len); + async_fill_vec(dst_iovec + tvec_idx, + hpa, (size_t)mapped_len); - tlen += cpy_len; + tlen += (uint32_t)mapped_len; + cpy_len -= (uint32_t)mapped_len; + mbuf_avail -= (uint32_t)mapped_len; + mbuf_offset += (uint32_t)mapped_len; + buf_avail -= (uint32_t)mapped_len; + buf_offset += (uint32_t)mapped_len; tvec_idx++; - } else { + } + + if (likely(cpy_len)) { if (unlikely(vq->batch_copy_nb_elems >= vq->size)) { rte_memcpy( (void *)((uintptr_t)(buf_addr + buf_offset)), @@ -1112,10 +1120,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, } } - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - buf_avail -= cpy_len; - buf_offset += cpy_len; + if (cpy_len) { + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + buf_avail -= cpy_len; + buf_offset += cpy_len; + } } out: -- 2.18.4