From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <maxime.coquelin@redhat.com>
Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28])
 by dpdk.org (Postfix) with ESMTP id 217352951
 for <dev@dpdk.org>; Tue, 23 Aug 2016 11:58:46 +0200 (CEST)
Received: from int-mx10.intmail.prod.int.phx2.redhat.com
 (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by mx1.redhat.com (Postfix) with ESMTPS id 7DF56C057FA9;
 Tue, 23 Aug 2016 09:58:45 +0000 (UTC)
Received: from [10.36.4.245] (vpn1-4-245.ams2.redhat.com [10.36.4.245])
 by int-mx10.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id
 u7N9whiu009471
 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO);
 Tue, 23 Aug 2016 05:58:44 -0400
To: Yuanhan Liu <yuanhan.liu@linux.intel.com>, dev@dpdk.org
References: <1471939839-29778-1-git-send-email-yuanhan.liu@linux.intel.com>
 <1471939839-29778-3-git-send-email-yuanhan.liu@linux.intel.com>
From: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-ID: <13f37c6e-b389-a758-81cd-861db7337e1f@redhat.com>
Date: Tue, 23 Aug 2016 11:58:42 +0200
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101
 Thunderbird/45.2.0
MIME-Version: 1.0
In-Reply-To: <1471939839-29778-3-git-send-email-yuanhan.liu@linux.intel.com>
Content-Type: text/plain; charset=windows-1252; format=flowed
Content-Transfer-Encoding: 7bit
X-Scanned-By: MIMEDefang 2.68 on 10.5.11.23
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
 (mx1.redhat.com [10.5.110.32]); Tue, 23 Aug 2016 09:58:45 +0000 (UTC)
Subject: Re: [dpdk-dev] [PATCH 2/6] vhost: get guest/host physical address
	mappings
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Tue, 23 Aug 2016 09:58:46 -0000



On 08/23/2016 10:10 AM, Yuanhan Liu wrote:
> So that we can convert a guest physical address to host physical
> address, which will be used in later Tx zero copy implementation.
>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
>  lib/librte_vhost/vhost.h      | 30 +++++++++++++++
>  lib/librte_vhost/vhost_user.c | 86 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 116 insertions(+)
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index df2107b..2d52987 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -114,6 +114,12 @@ struct vhost_virtqueue {
>   #define VIRTIO_F_VERSION_1 32
>  #endif
>
> +struct guest_page {
> +	uint64_t guest_phys_addr;
> +	uint64_t host_phys_addr;
> +	uint64_t size;
> +};
> +
>  /**
>   * Device structure contains all configuration information relating
>   * to the device.
> @@ -137,6 +143,10 @@ struct virtio_net {
>  	uint64_t		log_addr;
>  	struct ether_addr	mac;
>
> +	uint32_t		nr_guest_pages;
> +	uint32_t		max_guest_pages;
> +	struct guest_page       *guest_pages;
> +
>  } __rte_cache_aligned;
>
>  /**
> @@ -217,6 +227,26 @@ gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
>  	return 0;
>  }
>
> +/* Convert guest physical address to host physical address */
> +static inline phys_addr_t __attribute__((always_inline))
> +gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
> +{
> +	uint32_t i;
> +	struct guest_page *page;
> +
> +	for (i = 0; i < dev->nr_guest_pages; i++) {
> +		page = &dev->guest_pages[i];
> +
> +		if (gpa >= page->guest_phys_addr &&
> +		    gpa + size < page->guest_phys_addr + page->size) {
Shouldn't be '<=' here?

> +			return gpa - page->guest_phys_addr +
> +			       page->host_phys_addr;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  struct virtio_net_device_ops const *notify_ops;
>  struct virtio_net *get_device(int vid);
>
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index d2071fd..045d4f0 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -372,6 +372,81 @@ vhost_user_set_vring_base(struct virtio_net *dev,
>  	return 0;
>  }
>
> +static void
> +add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> +		   uint64_t host_phys_addr, uint64_t size)
> +{
> +	struct guest_page *page;
> +
> +	if (dev->nr_guest_pages == dev->max_guest_pages) {
> +		dev->max_guest_pages *= 2;
> +		dev->guest_pages = realloc(dev->guest_pages,
> +					dev->max_guest_pages * sizeof(*page));

Maybe realloc return could be checked?

> +	}
> +
> +	page = &dev->guest_pages[dev->nr_guest_pages++];
> +	page->guest_phys_addr = guest_phys_addr;
> +	page->host_phys_addr  = host_phys_addr;
> +	page->size = size;
> +}
> +
> +static void
> +add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg,
> +		uint64_t page_size)
> +{
> +	uint64_t reg_size = reg->size;
> +	uint64_t host_user_addr  = reg->host_user_addr;
> +	uint64_t guest_phys_addr = reg->guest_phys_addr;
> +	uint64_t host_phys_addr;
> +	uint64_t size;
> +	uint32_t pre_read;
> +
> +	pre_read = *((uint32_t *)(uintptr_t)host_user_addr);
> +	host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr);
> +	size = page_size - (guest_phys_addr & (page_size - 1));
> +	size = RTE_MIN(size, reg_size);
> +
> +	add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
> +	host_user_addr  += size;
> +	guest_phys_addr += size;
> +	reg_size -= size;
> +
> +	while (reg_size > 0) {
> +		pre_read += *((uint32_t *)(uintptr_t)host_user_addr);
> +		host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr);
> +		add_one_guest_page(dev, guest_phys_addr, host_phys_addr, page_size);
> +
> +		host_user_addr  += page_size;
> +		guest_phys_addr += page_size;
> +		reg_size -= page_size;
> +	}
> +
> +	/* FIXME */
> +	RTE_LOG(INFO, VHOST_CONFIG, ":: %u ::\n", pre_read);
For my information, what is the purpose of pre_read?

> +}
> +
> +/* TODO: enable it only in debug mode? */
> +static void
> +dump_guest_pages(struct virtio_net *dev)
> +{
> +	uint32_t i;
> +	struct guest_page *page;
> +
> +	for (i = 0; i < dev->nr_guest_pages; i++) {
> +		page = &dev->guest_pages[i];
> +
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"guest physical page region %u\n"
> +			"\t guest_phys_addr: %" PRIx64 "\n"
> +			"\t host_phys_addr : %" PRIx64 "\n"
> +			"\t size           : %" PRIx64 "\n",
> +			i,
> +			page->guest_phys_addr,
> +			page->host_phys_addr,
> +			page->size);
> +	}
> +}
> +
>  static int
>  vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
>  {
> @@ -396,6 +471,13 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
>  		dev->mem = NULL;
>  	}
>
> +	dev->nr_guest_pages = 0;
> +	if (!dev->guest_pages) {
> +		dev->max_guest_pages = 8;
> +		dev->guest_pages = malloc(dev->max_guest_pages *
> +						sizeof(struct guest_page));
> +	}
> +
>  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) +
>  		sizeof(struct virtio_memory_region) * memory.nregions, 0);
>  	if (dev->mem == NULL) {
> @@ -447,6 +529,8 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
>  		reg->mmap_size = mmap_size;
>  		reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
>
> +		add_guest_pages(dev, reg, alignment);
> +
>  		RTE_LOG(INFO, VHOST_CONFIG,
>  			"guest memory region %u, size: 0x%" PRIx64 "\n"
>  			"\t guest physical addr: 0x%" PRIx64 "\n"
> @@ -466,6 +550,8 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
>  			mmap_offset);
>  	}
>
> +	dump_guest_pages(dev);
> +
>  	return 0;
>
>  err_mmap:
>