From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: patrick.fu@intel.com, dev@dpdk.org, chenbo.xia@intel.com
Subject: Re: [dpdk-dev] [PATCH v4] vhost: fix async copy fail on multi-page buffers
Date: Tue, 28 Jul 2020 15:55:27 +0200 [thread overview]
Message-ID: <113e052d-8d6e-a0d4-2218-5a022e2497ee@redhat.com> (raw)
In-Reply-To: <20200728032851.3704083-1-patrick.fu@intel.com>
On 7/28/20 5:28 AM, patrick.fu@intel.com wrote:
> From: Patrick Fu <patrick.fu@intel.com>
>
> Async copy fails when single ring buffer vector is splited on multiple
> physical pages. This happens because current hpa address translation
> function doesn't handle multi-page buffers. A new gpa to hpa address
> conversion function, which returns the hpa on the first hitting host
> pages, is implemented in this patch. Async data path recursively calls
> this new function to construct a multi-segments async copy descriptor
> for ring buffers crossing physical page boundaries.
>
> Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
>
> Signed-off-by: Patrick Fu <patrick.fu@intel.com>
> ---
> v2:
> - change commit message and title
> - v1 patch used CPU to copy multi-page buffers; v2 patch split the
> copy into multiple async copy segments whenever possible
>
> v3:
> - added fixline
>
> v4:
> - fix miss translation of the gpa which is the same length with host
> page size
>
> lib/librte_vhost/vhost.h | 50 +++++++++++++++++++++++++++++++++++
> lib/librte_vhost/virtio_net.c | 40 +++++++++++++++++-----------
> 2 files changed, 75 insertions(+), 15 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 0f7212f88..05c202a57 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -616,6 +616,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
> return 0;
> }
>
> +static __rte_always_inline rte_iova_t
> +gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
> + uint64_t gpa_size, uint64_t *hpa_size)
> +{
> + uint32_t i;
> + struct guest_page *page;
> + struct guest_page key;
> +
> + *hpa_size = gpa_size;
> + if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> + key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
> + page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
> + sizeof(struct guest_page), guest_page_addrcmp);
> + if (page) {
> + if (gpa + gpa_size <=
> + page->guest_phys_addr + page->size) {
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + } else if (gpa < page->guest_phys_addr +
> + page->size) {
> + *hpa_size = page->guest_phys_addr +
> + page->size - gpa;
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + }
> + }
> + } else {
> + for (i = 0; i < dev->nr_guest_pages; i++) {
> + page = &dev->guest_pages[i];
> +
> + if (gpa >= page->guest_phys_addr) {
> + if (gpa + gpa_size <=
> + page->guest_phys_addr + page->size) {
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + } else if (gpa < page->guest_phys_addr +
> + page->size) {
> + *hpa_size = page->guest_phys_addr +
> + page->size - gpa;
> + return gpa - page->guest_phys_addr +
> + page->host_phys_addr;
> + }
> + }
> + }
> + }
> +
> + *hpa_size = 0;
> + return 0;
> +}
> +
> static __rte_always_inline uint64_t
> hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)
> {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 95a0bc19f..124a33a10 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
> struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
> int error = 0;
> + uint64_t mapped_len;
>
> uint32_t tlen = 0;
> int tvec_idx = 0;
> @@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>
> cpy_len = RTE_MIN(buf_avail, mbuf_avail);
>
> - if (unlikely(cpy_len >= cpy_threshold)) {
> - hpa = (void *)(uintptr_t)gpa_to_hpa(dev,
> - buf_iova + buf_offset, cpy_len);
> + while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
> + hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
> + buf_iova + buf_offset,
> + cpy_len, &mapped_len);
>
> - if (unlikely(!hpa)) {
> - error = -1;
> - goto out;
> - }
> + if (unlikely(!hpa || mapped_len < cpy_threshold))
> + break;
>
> async_fill_vec(src_iovec + tvec_idx,
> (void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
> - mbuf_offset), cpy_len);
> + mbuf_offset), (size_t)mapped_len);
>
> - async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len);
> + async_fill_vec(dst_iovec + tvec_idx,
> + hpa, (size_t)mapped_len);
>
> - tlen += cpy_len;
> + tlen += (uint32_t)mapped_len;
> + cpy_len -= (uint32_t)mapped_len;
> + mbuf_avail -= (uint32_t)mapped_len;
> + mbuf_offset += (uint32_t)mapped_len;
> + buf_avail -= (uint32_t)mapped_len;
> + buf_offset += (uint32_t)mapped_len;
> tvec_idx++;
> - } else {
> + }
> +
> + if (likely(cpy_len)) {
> if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
> rte_memcpy(
> (void *)((uintptr_t)(buf_addr + buf_offset)),
> @@ -1112,10 +1120,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> }
> }
>
> - mbuf_avail -= cpy_len;
> - mbuf_offset += cpy_len;
> - buf_avail -= cpy_len;
> - buf_offset += cpy_len;
> + if (cpy_len) {
> + mbuf_avail -= cpy_len;
> + mbuf_offset += cpy_len;
> + buf_avail -= cpy_len;
> + buf_offset += cpy_len;
> + }
Is that really necessary to check if copy length is not 0?
Thanks,
Maxime
> }
>
> out:
>
next prev parent reply other threads:[~2020-07-28 13:55 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-20 2:52 [dpdk-dev] [PATCH v1] vhost: support cross page buf in async data path patrick.fu
2020-07-20 16:39 ` Maxime Coquelin
2020-07-21 2:57 ` Fu, Patrick
2020-07-21 8:35 ` Maxime Coquelin
2020-07-21 9:01 ` Fu, Patrick
2020-07-24 13:49 ` [dpdk-dev] [PATCH v2] vhost: fix async copy fail on multi-page buffers patrick.fu
2020-07-27 6:33 ` [dpdk-dev] [PATCH v3] " patrick.fu
2020-07-27 13:14 ` Xia, Chenbo
2020-07-28 3:09 ` Fu, Patrick
2020-07-28 3:28 ` [dpdk-dev] [PATCH v4] " patrick.fu
2020-07-28 13:55 ` Maxime Coquelin [this message]
2020-07-29 1:40 ` Fu, Patrick
2020-07-29 2:05 ` Fu, Patrick
2020-07-29 2:04 ` [dpdk-dev] [PATCH v5] " Patrick Fu
2020-07-29 14:24 ` Maxime Coquelin
2020-07-29 14:55 ` Maxime Coquelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=113e052d-8d6e-a0d4-2218-5a022e2497ee@redhat.com \
--to=maxime.coquelin@redhat.com \
--cc=chenbo.xia@intel.com \
--cc=dev@dpdk.org \
--cc=patrick.fu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).