DPDK patches and discussions
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: patrick.fu@intel.com, dev@dpdk.org, chenbo.xia@intel.com
Subject: Re: [dpdk-dev] [PATCH v4] vhost: fix async copy fail on multi-page buffers
Date: Tue, 28 Jul 2020 15:55:27 +0200
Message-ID: <113e052d-8d6e-a0d4-2218-5a022e2497ee@redhat.com> (raw)
In-Reply-To: <20200728032851.3704083-1-patrick.fu@intel.com>



On 7/28/20 5:28 AM, patrick.fu@intel.com wrote:
> From: Patrick Fu <patrick.fu@intel.com>
> 
> Async copy fails when single ring buffer vector is splited on multiple
> physical pages. This happens because current hpa address translation
> function doesn't handle multi-page buffers. A new gpa to hpa address
> conversion function, which returns the hpa on the first hitting host
> pages, is implemented in this patch. Async data path recursively calls
> this new function to construct a multi-segments async copy descriptor
> for ring buffers crossing physical page boundaries.
> 
> Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
> 
> Signed-off-by: Patrick Fu <patrick.fu@intel.com>
> ---
> v2:
>  - change commit message and title
>  - v1 patch used CPU to copy multi-page buffers; v2 patch split the
> copy into multiple async copy segments whenever possible
> 
> v3:
>  - added fixline
> 
> v4:
>  - fix miss translation of the gpa which is the same length with host
>    page size
> 
>  lib/librte_vhost/vhost.h      | 50 +++++++++++++++++++++++++++++++++++
>  lib/librte_vhost/virtio_net.c | 40 +++++++++++++++++-----------
>  2 files changed, 75 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 0f7212f88..05c202a57 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -616,6 +616,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
>  	return 0;
>  }
>  
> +static __rte_always_inline rte_iova_t
> +gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
> +	uint64_t gpa_size, uint64_t *hpa_size)
> +{
> +	uint32_t i;
> +	struct guest_page *page;
> +	struct guest_page key;
> +
> +	*hpa_size = gpa_size;
> +	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> +		key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
> +		page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
> +			       sizeof(struct guest_page), guest_page_addrcmp);
> +		if (page) {
> +			if (gpa + gpa_size <=
> +					page->guest_phys_addr + page->size) {
> +				return gpa - page->guest_phys_addr +
> +					page->host_phys_addr;
> +			} else if (gpa < page->guest_phys_addr +
> +						page->size) {
> +				*hpa_size = page->guest_phys_addr +
> +					page->size - gpa;
> +				return gpa - page->guest_phys_addr +
> +					page->host_phys_addr;
> +			}
> +		}
> +	} else {
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +
> +			if (gpa >= page->guest_phys_addr) {
> +				if (gpa + gpa_size <=
> +					page->guest_phys_addr + page->size) {
> +					return gpa - page->guest_phys_addr +
> +						page->host_phys_addr;
> +				} else if (gpa < page->guest_phys_addr +
> +							page->size) {
> +					*hpa_size = page->guest_phys_addr +
> +						page->size - gpa;
> +					return gpa - page->guest_phys_addr +
> +						page->host_phys_addr;
> +				}
> +			}
> +		}
> +	}
> +
> +	*hpa_size = 0;
> +	return 0;
> +}
> +
>  static __rte_always_inline uint64_t
>  hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)
>  {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 95a0bc19f..124a33a10 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
>  	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
>  	int error = 0;
> +	uint64_t mapped_len;
>  
>  	uint32_t tlen = 0;
>  	int tvec_idx = 0;
> @@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  
>  		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
>  
> -		if (unlikely(cpy_len >= cpy_threshold)) {
> -			hpa = (void *)(uintptr_t)gpa_to_hpa(dev,
> -					buf_iova + buf_offset, cpy_len);
> +		while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
> +			hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
> +					buf_iova + buf_offset,
> +					cpy_len, &mapped_len);
>  
> -			if (unlikely(!hpa)) {
> -				error = -1;
> -				goto out;
> -			}
> +			if (unlikely(!hpa || mapped_len < cpy_threshold))
> +				break;
>  
>  			async_fill_vec(src_iovec + tvec_idx,
>  				(void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
> -						mbuf_offset), cpy_len);
> +				mbuf_offset), (size_t)mapped_len);
>  
> -			async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len);
> +			async_fill_vec(dst_iovec + tvec_idx,
> +					hpa, (size_t)mapped_len);
>  
> -			tlen += cpy_len;
> +			tlen += (uint32_t)mapped_len;
> +			cpy_len -= (uint32_t)mapped_len;
> +			mbuf_avail  -= (uint32_t)mapped_len;
> +			mbuf_offset += (uint32_t)mapped_len;
> +			buf_avail  -= (uint32_t)mapped_len;
> +			buf_offset += (uint32_t)mapped_len;
>  			tvec_idx++;
> -		} else {
> +		}
> +
> +		if (likely(cpy_len)) {
>  			if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
>  				rte_memcpy(
>  				(void *)((uintptr_t)(buf_addr + buf_offset)),
> @@ -1112,10 +1120,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  			}
>  		}
>  
> -		mbuf_avail  -= cpy_len;
> -		mbuf_offset += cpy_len;
> -		buf_avail  -= cpy_len;
> -		buf_offset += cpy_len;
> +		if (cpy_len) {
> +			mbuf_avail  -= cpy_len;
> +			mbuf_offset += cpy_len;
> +			buf_avail  -= cpy_len;
> +			buf_offset += cpy_len;
> +		}

Is that really necessary to check if copy length is not 0?

Thanks,
Maxime

>  	}
>  
>  out:
> 


  reply	other threads:[~2020-07-28 13:55 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-20  2:52 [dpdk-dev] [PATCH v1] vhost: support cross page buf in async data path patrick.fu
2020-07-20 16:39 ` Maxime Coquelin
2020-07-21  2:57   ` Fu, Patrick
2020-07-21  8:35     ` Maxime Coquelin
2020-07-21  9:01       ` Fu, Patrick
2020-07-24 13:49 ` [dpdk-dev] [PATCH v2] vhost: fix async copy fail on multi-page buffers patrick.fu
2020-07-27  6:33 ` [dpdk-dev] [PATCH v3] " patrick.fu
2020-07-27 13:14   ` Xia, Chenbo
2020-07-28  3:09     ` Fu, Patrick
2020-07-28  3:28 ` [dpdk-dev] [PATCH v4] " patrick.fu
2020-07-28 13:55   ` Maxime Coquelin [this message]
2020-07-29  1:40     ` Fu, Patrick
2020-07-29  2:05       ` Fu, Patrick
2020-07-29  2:04 ` [dpdk-dev] [PATCH v5] " Patrick Fu
2020-07-29 14:24   ` Maxime Coquelin
2020-07-29 14:55   ` Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=113e052d-8d6e-a0d4-2218-5a022e2497ee@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=chenbo.xia@intel.com \
    --cc=dev@dpdk.org \
    --cc=patrick.fu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git