Re: [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping

DPDK patches and discussions
 help / color / mirror / Atom feed

* Re: [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping
  2020-03-16 15:33 [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping Marvin Liu
@ 2020-03-16 13:48 ` Ye Xiaolong
  2020-03-17  1:01   ` Liu, Yong
  2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 31+ messages in thread
From: Ye Xiaolong @ 2020-03-16 13:48 UTC (permalink / raw)
  To: Marvin Liu; +Cc: maxime.coquelin, zhihong.wang, dev

Hi, Marvin

On 03/16, Marvin Liu wrote:
>If Tx zero copy enabled, gpa to hpa mapping table is updated one by
>one. This will harm performance when guest memory backend using 2M
>hugepages. Now add cached mapping table which will sorted by using
>sequence. Address translation will first check cached mapping table,
>now performance is back.
>
>Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>index 2087d1400..de2c09e7e 100644
>--- a/lib/librte_vhost/vhost.h
>+++ b/lib/librte_vhost/vhost.h
>@@ -368,7 +368,9 @@ struct virtio_net {
> 	struct vhost_device_ops const *notify_ops;
> 
> 	uint32_t		nr_guest_pages;
>+	uint32_t		nr_cached;

What about naming it nr_cached_guest_pages to make it more self-explanatory
as nr_cached is too generic?

> 	uint32_t		max_guest_pages;
>+	struct guest_page       *cached_guest_pages;
> 	struct guest_page       *guest_pages;
> 
> 	int			slave_req_fd;
>@@ -554,11 +556,23 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
> 	uint32_t i;
> 	struct guest_page *page;
> 
>+	for (i = 0; i < dev->nr_cached; i++) {
>+		page = &dev->cached_guest_pages[i];
>+		if (gpa >= page->guest_phys_addr &&
>+			gpa + size < page->guest_phys_addr + page->size) {
>+			return gpa - page->guest_phys_addr +
>+				page->host_phys_addr;
>+		}
>+	}
>+
> 	for (i = 0; i < dev->nr_guest_pages; i++) {
> 		page = &dev->guest_pages[i];
> 
> 		if (gpa >= page->guest_phys_addr &&
> 		    gpa + size < page->guest_phys_addr + page->size) {
>+			rte_memcpy(&dev->cached_guest_pages[dev->nr_cached],
>+				   page, sizeof(struct guest_page));
>+			dev->nr_cached++;
> 			return gpa - page->guest_phys_addr +
> 			       page->host_phys_addr;
> 		}
>diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>index bd1be0104..573e99066 100644
>--- a/lib/librte_vhost/vhost_user.c
>+++ b/lib/librte_vhost/vhost_user.c
>@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> 	}
> 
> 	free(dev->guest_pages);
>+	free(dev->cached_guest_pages);
> 	dev->guest_pages = NULL;
>+	dev->cached_guest_pages = NULL;
> 
> 	if (dev->log_addr) {
> 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
>@@ -905,7 +907,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> 		old_pages = dev->guest_pages;
> 		dev->guest_pages = realloc(dev->guest_pages,
> 					dev->max_guest_pages * sizeof(*page));
>-		if (!dev->guest_pages) {
>+		dev->cached_guest_pages = realloc(dev->cached_guest_pages,
>+					dev->max_guest_pages * sizeof(*page));
>+		dev->nr_cached = 0;
>+		if (!dev->guest_pages || !dev->cached_guest_pages) {

Better to compare pointer to NULL according to DPDK's coding style.

> 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
> 			free(old_pages);
> 			return -1;
>@@ -1075,6 +1080,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
> 		}
> 	}
> 

Do we need initialize dev->nr_cached to 0 explicitly here?

>+	if (!dev->cached_guest_pages) {
>+		dev->cached_guest_pages = malloc(dev->max_guest_pages *
>+						sizeof(struct guest_page));

I'm wondering why use malloc/realloc/free for cached_guest_pages instead of DPDK
memory allocator APIs, I can see current code uses malloc/realloc/free for guest_pages,
Is there any history reason I don't know?

Thanks,
Xiaolong

>+		if (dev->cached_guest_pages == NULL) {
>+			VHOST_LOG_CONFIG(ERR,
>+				"(%d) failed to allocate memory "
>+				"for dev->cached_guest_pages\n",
>+				dev->vid);
>+			return RTE_VHOST_MSG_RESULT_ERR;
>+		}
>+	}
>+
> 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
> 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> 	if (dev->mem == NULL) {
>-- 
>2.17.1
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping
@ 2020-03-16 15:33 Marvin Liu
  2020-03-16 13:48 ` Ye Xiaolong
                   ` (4 more replies)
  0 siblings, 5 replies; 31+ messages in thread
From: Marvin Liu @ 2020-03-16 15:33 UTC (permalink / raw)
  To: maxime.coquelin, zhihong.wang, xiaolong.ye; +Cc: dev, Marvin Liu

If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now add cached mapping table which will sorted by using
sequence. Address translation will first check cached mapping table,
now performance is back.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 2087d1400..de2c09e7e 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -368,7 +368,9 @@ struct virtio_net {
 	struct vhost_device_ops const *notify_ops;
 
 	uint32_t		nr_guest_pages;
+	uint32_t		nr_cached;
 	uint32_t		max_guest_pages;
+	struct guest_page       *cached_guest_pages;
 	struct guest_page       *guest_pages;
 
 	int			slave_req_fd;
@@ -554,11 +556,23 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 	uint32_t i;
 	struct guest_page *page;
 
+	for (i = 0; i < dev->nr_cached; i++) {
+		page = &dev->cached_guest_pages[i];
+		if (gpa >= page->guest_phys_addr &&
+			gpa + size < page->guest_phys_addr + page->size) {
+			return gpa - page->guest_phys_addr +
+				page->host_phys_addr;
+		}
+	}
+
 	for (i = 0; i < dev->nr_guest_pages; i++) {
 		page = &dev->guest_pages[i];
 
 		if (gpa >= page->guest_phys_addr &&
 		    gpa + size < page->guest_phys_addr + page->size) {
+			rte_memcpy(&dev->cached_guest_pages[dev->nr_cached],
+				   page, sizeof(struct guest_page));
+			dev->nr_cached++;
 			return gpa - page->guest_phys_addr +
 			       page->host_phys_addr;
 		}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..573e99066 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
 	}
 
 	free(dev->guest_pages);
+	free(dev->cached_guest_pages);
 	dev->guest_pages = NULL;
+	dev->cached_guest_pages = NULL;
 
 	if (dev->log_addr) {
 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
@@ -905,7 +907,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		old_pages = dev->guest_pages;
 		dev->guest_pages = realloc(dev->guest_pages,
 					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->cached_guest_pages = realloc(dev->cached_guest_pages,
+					dev->max_guest_pages * sizeof(*page));
+		dev->nr_cached = 0;
+		if (!dev->guest_pages || !dev->cached_guest_pages) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
 			free(old_pages);
 			return -1;
@@ -1075,6 +1080,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		}
 	}
 
+	if (!dev->cached_guest_pages) {
+		dev->cached_guest_pages = malloc(dev->max_guest_pages *
+						sizeof(struct guest_page));
+		if (dev->cached_guest_pages == NULL) {
+			VHOST_LOG_CONFIG(ERR,
+				"(%d) failed to allocate memory "
+				"for dev->cached_guest_pages\n",
+				dev->vid);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+	}
+
 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
 	if (dev->mem == NULL) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping
  2020-03-16 13:48 ` Ye Xiaolong
@ 2020-03-17  1:01   ` Liu, Yong
  0 siblings, 0 replies; 31+ messages in thread
From: Liu, Yong @ 2020-03-17  1:01 UTC (permalink / raw)
  To: Ye, Xiaolong; +Cc: maxime.coquelin, Wang, Zhihong, dev

Thanks, xiaolong. 

> -----Original Message-----
> From: Ye, Xiaolong <xiaolong.ye@intel.com>
> Sent: Monday, March 16, 2020 9:48 PM
> To: Liu, Yong <yong.liu@intel.com>
> Cc: maxime.coquelin@redhat.com; Wang, Zhihong
> <zhihong.wang@intel.com>; dev@dpdk.org
> Subject: Re: [PATCH] vhost: cache guest/vhost physical address mapping
> 
> Hi, Marvin
> 
> On 03/16, Marvin Liu wrote:
> >If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> >one. This will harm performance when guest memory backend using 2M
> >hugepages. Now add cached mapping table which will sorted by using
> >sequence. Address translation will first check cached mapping table,
> >now performance is back.
> >
> >Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> >diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> >index 2087d1400..de2c09e7e 100644
> >--- a/lib/librte_vhost/vhost.h
> >+++ b/lib/librte_vhost/vhost.h
> >@@ -368,7 +368,9 @@ struct virtio_net {
> > 	struct vhost_device_ops const *notify_ops;
> >
> > 	uint32_t		nr_guest_pages;
> >+	uint32_t		nr_cached;
> 
> What about naming it nr_cached_guest_pages to make it more self-
> explanatory
> as nr_cached is too generic?

Agreed, naming is too generic. Will be changed in next version.

> 
> > 	uint32_t		max_guest_pages;
> >+	struct guest_page       *cached_guest_pages;
> > 	struct guest_page       *guest_pages;
> >
> > 	int			slave_req_fd;
> >@@ -554,11 +556,23 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> uint64_t size)
> > 	uint32_t i;
> > 	struct guest_page *page;
> >
> >+	for (i = 0; i < dev->nr_cached; i++) {
> >+		page = &dev->cached_guest_pages[i];
> >+		if (gpa >= page->guest_phys_addr &&
> >+			gpa + size < page->guest_phys_addr + page->size) {
> >+			return gpa - page->guest_phys_addr +
> >+				page->host_phys_addr;
> >+		}
> >+	}
> >+
> > 	for (i = 0; i < dev->nr_guest_pages; i++) {
> > 		page = &dev->guest_pages[i];
> >
> > 		if (gpa >= page->guest_phys_addr &&
> > 		    gpa + size < page->guest_phys_addr + page->size) {
> >+			rte_memcpy(&dev->cached_guest_pages[dev-
> >nr_cached],
> >+				   page, sizeof(struct guest_page));
> >+			dev->nr_cached++;
> > 			return gpa - page->guest_phys_addr +
> > 			       page->host_phys_addr;
> > 		}
> >diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> >index bd1be0104..573e99066 100644
> >--- a/lib/librte_vhost/vhost_user.c
> >+++ b/lib/librte_vhost/vhost_user.c
> >@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> > 	}
> >
> > 	free(dev->guest_pages);
> >+	free(dev->cached_guest_pages);
> > 	dev->guest_pages = NULL;
> >+	dev->cached_guest_pages = NULL;
> >
> > 	if (dev->log_addr) {
> > 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> >@@ -905,7 +907,10 @@ add_one_guest_page(struct virtio_net *dev,
> uint64_t guest_phys_addr,
> > 		old_pages = dev->guest_pages;
> > 		dev->guest_pages = realloc(dev->guest_pages,
> > 					dev->max_guest_pages *
> sizeof(*page));
> >-		if (!dev->guest_pages) {
> >+		dev->cached_guest_pages = realloc(dev-
> >cached_guest_pages,
> >+					dev->max_guest_pages *
> sizeof(*page));
> >+		dev->nr_cached = 0;
> >+		if (!dev->guest_pages || !dev->cached_guest_pages) {
> 
> Better to compare pointer to NULL according to DPDK's coding style.
> 
 OK, will change it.
> > 			VHOST_LOG_CONFIG(ERR, "cannot realloc
> guest_pages\n");
> > 			free(old_pages);
> > 			return -1;
> >@@ -1075,6 +1080,18 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
> > 		}
> > 	}
> >
> 
> Do we need initialize dev->nr_cached to 0 explicitly here?
> 

Structure vhost_virtqueue has been cleared in function init_vring_queue, it is not needed to do initialization in other place.

> >+	if (!dev->cached_guest_pages) {
> >+		dev->cached_guest_pages = malloc(dev->max_guest_pages *
> >+						sizeof(struct guest_page));
> 
> I'm wondering why use malloc/realloc/free for cached_guest_pages instead
> of DPDK
> memory allocator APIs, I can see current code uses malloc/realloc/free for
> guest_pages,
> Is there any history reason I don't know?
> 

I don't think there's specific reason to use glibc malloc/realloc functions. 
History may be lost since code was added few years ago. I will changed these functions to dpdk API in next version.

> Thanks,
> Xiaolong
> 
> >+		if (dev->cached_guest_pages == NULL) {
> >+			VHOST_LOG_CONFIG(ERR,
> >+				"(%d) failed to allocate memory "
> >+				"for dev->cached_guest_pages\n",
> >+				dev->vid);
> >+			return RTE_VHOST_MSG_RESULT_ERR;
> >+		}
> >+	}
> >+
> > 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
> > 		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> 0);
> > 	if (dev->mem == NULL) {
> >--
> >2.17.1
> >

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-01 14:50   ` [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation Marvin Liu
@ 2020-04-01 10:07     ` Gavin Hu
  2020-04-01 13:01       ` Liu, Yong
  2020-04-02  2:57     ` Ye Xiaolong
  2020-04-27  8:45     ` Maxime Coquelin
  2 siblings, 1 reply; 31+ messages in thread
From: Gavin Hu @ 2020-04-01 10:07 UTC (permalink / raw)
  To: Marvin Liu, maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, nd

Hi Marvin,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> Sent: Wednesday, April 1, 2020 10:50 PM
> To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> zhihong.wang@intel.com
> Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> one. This will harm performance when guest memory backend using 2M
> hugepages. Now add cached mapping table which will sorted by using
> sequence. Address translation will first check cached mapping table,
> then check unsorted mapping table if no match found.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 2087d1400..5cb0e83dd 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -368,7 +368,9 @@ struct virtio_net {
>  	struct vhost_device_ops const *notify_ops;
> 
>  	uint32_t		nr_guest_pages;
> +	uint32_t		nr_cached_guest_pages;
>  	uint32_t		max_guest_pages;
> +	struct guest_page       *cached_guest_pages;
>  	struct guest_page       *guest_pages;
> 
>  	int			slave_req_fd;
> @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> uint64_t size)
>  {
>  	uint32_t i;
>  	struct guest_page *page;
> +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> +
> +	for (i = 0; i < cached_pages; i++) {
> +		page = &dev->cached_guest_pages[i];
> +		if (gpa >= page->guest_phys_addr &&
> +			gpa + size < page->guest_phys_addr + page->size) {
> +			return gpa - page->guest_phys_addr +
> +				page->host_phys_addr;
> +		}
> +	}
Sorry, I did not see any speedup with cached guest pages in comparison to the old code below.
Is it not a simple copy? 
Is it a better idea to use hash instead to speed up the translation? 
/Gavin
> 
>  	for (i = 0; i < dev->nr_guest_pages; i++) {
>  		page = &dev->guest_pages[i];
> 
>  		if (gpa >= page->guest_phys_addr &&
>  		    gpa + size < page->guest_phys_addr + page->size) {
> +			rte_memcpy(&dev-
> >cached_guest_pages[cached_pages],
> +				   page, sizeof(struct guest_page));
> +			dev->nr_cached_guest_pages++;
>  			return gpa - page->guest_phys_addr +
>  			       page->host_phys_addr;
>  		}
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 79fcb9d19..1bae1fddc 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
>  	}
> 
>  	rte_free(dev->guest_pages);
> +	rte_free(dev->cached_guest_pages);
>  	dev->guest_pages = NULL;
> +	dev->cached_guest_pages = NULL;
> 
>  	if (dev->log_addr) {
>  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> uint64_t guest_phys_addr,
>  		   uint64_t host_phys_addr, uint64_t size)
>  {
>  	struct guest_page *page, *last_page;
> -	struct guest_page *old_pages;
> +	struct guest_page *old_pages, *old_cached_pages;
> 
>  	if (dev->nr_guest_pages == dev->max_guest_pages) {
>  		dev->max_guest_pages *= 2;
> @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> uint64_t guest_phys_addr,
>  		dev->guest_pages = rte_realloc(dev->guest_pages,
>  					dev->max_guest_pages *
> sizeof(*page),
>  					RTE_CACHE_LINE_SIZE);
> -		if (dev->guest_pages == NULL) {
> +		old_cached_pages = dev->cached_guest_pages;
> +		dev->cached_guest_pages = rte_realloc(dev-
> >cached_guest_pages,
> +						dev->max_guest_pages *
> +						sizeof(*page),
> +						RTE_CACHE_LINE_SIZE);
> +		dev->nr_cached_guest_pages = 0;
> +		if (dev->guest_pages == NULL ||
> +				dev->cached_guest_pages == NULL) {
>  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> guest_pages\n");
>  			rte_free(old_pages);
> +			rte_free(old_cached_pages);
> +			dev->guest_pages = NULL;
> +			dev->cached_guest_pages = NULL;
>  			return -1;
>  		}
>  	}
> @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
>  		}
>  	}
> 
> +	if (dev->cached_guest_pages == NULL) {
> +		dev->cached_guest_pages = rte_zmalloc(NULL,
> +						dev->max_guest_pages *
> +						sizeof(struct guest_page),
> +						RTE_CACHE_LINE_SIZE);
> +		if (dev->cached_guest_pages == NULL) {
> +			VHOST_LOG_CONFIG(ERR,
> +				"(%d) failed to allocate memory "
> +				"for dev->cached_guest_pages\n",
> +				dev->vid);
> +			return RTE_VHOST_MSG_RESULT_ERR;
> +		}
> +	}
> +
>  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
>  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> 0);
>  	if (dev->mem == NULL) {
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
@ 2020-04-01 10:08   ` Gavin Hu
  2020-04-01 14:50   ` [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation Marvin Liu
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 31+ messages in thread
From: Gavin Hu @ 2020-04-01 10:08 UTC (permalink / raw)
  To: Marvin Liu, maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, nd

Reviewed-by: Gavin Hu <gavin.hu@arm.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-01 10:07     ` Gavin Hu
@ 2020-04-01 13:01       ` Liu, Yong
  2020-04-02  3:04         ` Gavin Hu
  2020-04-03  8:22         ` Ma, LihongX
  0 siblings, 2 replies; 31+ messages in thread
From: Liu, Yong @ 2020-04-01 13:01 UTC (permalink / raw)
  To: Gavin Hu, maxime.coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev, nd



> -----Original Message-----
> From: Gavin Hu <Gavin.Hu@arm.com>
> Sent: Wednesday, April 1, 2020 6:07 PM
> To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> Hi Marvin,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > Sent: Wednesday, April 1, 2020 10:50 PM
> > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> > zhihong.wang@intel.com
> > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> >
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > one. This will harm performance when guest memory backend using 2M
> > hugepages. Now add cached mapping table which will sorted by using
> > sequence. Address translation will first check cached mapping table,
> > then check unsorted mapping table if no match found.
> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > index 2087d1400..5cb0e83dd 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -368,7 +368,9 @@ struct virtio_net {
> >  	struct vhost_device_ops const *notify_ops;
> >
> >  	uint32_t		nr_guest_pages;
> > +	uint32_t		nr_cached_guest_pages;
> >  	uint32_t		max_guest_pages;
> > +	struct guest_page       *cached_guest_pages;
> >  	struct guest_page       *guest_pages;
> >
> >  	int			slave_req_fd;
> > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> > uint64_t size)
> >  {
> >  	uint32_t i;
> >  	struct guest_page *page;
> > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > +
> > +	for (i = 0; i < cached_pages; i++) {
> > +		page = &dev->cached_guest_pages[i];
> > +		if (gpa >= page->guest_phys_addr &&
> > +			gpa + size < page->guest_phys_addr + page->size) {
> > +			return gpa - page->guest_phys_addr +
> > +				page->host_phys_addr;
> > +		}
> > +	}
> Sorry, I did not see any speedup with cached guest pages in comparison to
> the old code below.
> Is it not a simple copy?
> Is it a better idea to use hash instead to speed up the translation?
> /Gavin

Hi Gavin,
Here just resort the overall mapping table according to using sequence.  
Most likely virtio driver will reuse recently cycled buffers, thus search will find match in beginning. 
That is simple fix for performance enhancement. If use hash for index, it will take much more cost in normal case.

Regards,
Marvin 


> >
> >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> >  		page = &dev->guest_pages[i];
> >
> >  		if (gpa >= page->guest_phys_addr &&
> >  		    gpa + size < page->guest_phys_addr + page->size) {
> > +			rte_memcpy(&dev-
> > >cached_guest_pages[cached_pages],
> > +				   page, sizeof(struct guest_page));
> > +			dev->nr_cached_guest_pages++;
> >  			return gpa - page->guest_phys_addr +
> >  			       page->host_phys_addr;
> >  		}
> > diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> > index 79fcb9d19..1bae1fddc 100644
> > --- a/lib/librte_vhost/vhost_user.c
> > +++ b/lib/librte_vhost/vhost_user.c
> > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> >  	}
> >
> >  	rte_free(dev->guest_pages);
> > +	rte_free(dev->cached_guest_pages);
> >  	dev->guest_pages = NULL;
> > +	dev->cached_guest_pages = NULL;
> >
> >  	if (dev->log_addr) {
> >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> > @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> > uint64_t guest_phys_addr,
> >  		   uint64_t host_phys_addr, uint64_t size)
> >  {
> >  	struct guest_page *page, *last_page;
> > -	struct guest_page *old_pages;
> > +	struct guest_page *old_pages, *old_cached_pages;
> >
> >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> >  		dev->max_guest_pages *= 2;
> > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> > uint64_t guest_phys_addr,
> >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> >  					dev->max_guest_pages *
> > sizeof(*page),
> >  					RTE_CACHE_LINE_SIZE);
> > -		if (dev->guest_pages == NULL) {
> > +		old_cached_pages = dev->cached_guest_pages;
> > +		dev->cached_guest_pages = rte_realloc(dev-
> > >cached_guest_pages,
> > +						dev->max_guest_pages *
> > +						sizeof(*page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		dev->nr_cached_guest_pages = 0;
> > +		if (dev->guest_pages == NULL ||
> > +				dev->cached_guest_pages == NULL) {
> >  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> > guest_pages\n");
> >  			rte_free(old_pages);
> > +			rte_free(old_cached_pages);
> > +			dev->guest_pages = NULL;
> > +			dev->cached_guest_pages = NULL;
> >  			return -1;
> >  		}
> >  	}
> > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net
> > **pdev, struct VhostUserMsg *msg,
> >  		}
> >  	}
> >
> > +	if (dev->cached_guest_pages == NULL) {
> > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > +						dev->max_guest_pages *
> > +						sizeof(struct guest_page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		if (dev->cached_guest_pages == NULL) {
> > +			VHOST_LOG_CONFIG(ERR,
> > +				"(%d) failed to allocate memory "
> > +				"for dev->cached_guest_pages\n",
> > +				dev->vid);
> > +			return RTE_VHOST_MSG_RESULT_ERR;
> > +		}
> > +	}
> > +
> >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > rte_vhost_memory) +
> >  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> > 0);
> >  	if (dev->mem == NULL) {
> > --
> > 2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-03-16 15:33 [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping Marvin Liu
  2020-03-16 13:48 ` Ye Xiaolong
@ 2020-04-01 14:50 ` Marvin Liu
  2020-04-01 10:08   ` Gavin Hu
                     ` (4 more replies)
  2020-04-28  9:13 ` [dpdk-dev] [PATCH v3 " Marvin Liu
                   ` (2 subsequent siblings)
  4 siblings, 5 replies; 31+ messages in thread
From: Marvin Liu @ 2020-04-01 14:50 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

Replace dynamic memory allocator with dpdk memory allocator.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..79fcb9d19 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		dev->mem = NULL;
 	}
 
-	free(dev->guest_pages);
+	rte_free(dev->guest_pages);
 	dev->guest_pages = NULL;
 
 	if (dev->log_addr) {
@@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
 		old_pages = dev->guest_pages;
-		dev->guest_pages = realloc(dev->guest_pages,
-					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->guest_pages = rte_realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page),
+					RTE_CACHE_LINE_SIZE);
+		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
-			free(old_pages);
+			rte_free(old_pages);
 			return -1;
 		}
 	}
@@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
 
 	dev->nr_guest_pages = 0;
-	if (!dev->guest_pages) {
+	if (dev->guest_pages == NULL) {
 		dev->max_guest_pages = 8;
-		dev->guest_pages = malloc(dev->max_guest_pages *
-						sizeof(struct guest_page));
+		dev->guest_pages = rte_zmalloc(NULL,
+					dev->max_guest_pages *
+					sizeof(struct guest_page),
+					RTE_CACHE_LINE_SIZE);
 		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"(%d) failed to allocate memory "
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
  2020-04-01 10:08   ` Gavin Hu
@ 2020-04-01 14:50   ` Marvin Liu
  2020-04-01 10:07     ` Gavin Hu
                       ` (2 more replies)
  2020-04-02  2:57   ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Ye Xiaolong
                     ` (2 subsequent siblings)
  4 siblings, 3 replies; 31+ messages in thread
From: Marvin Liu @ 2020-04-01 14:50 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now add cached mapping table which will sorted by using
sequence. Address translation will first check cached mapping table,
then check unsorted mapping table if no match found.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 2087d1400..5cb0e83dd 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -368,7 +368,9 @@ struct virtio_net {
 	struct vhost_device_ops const *notify_ops;
 
 	uint32_t		nr_guest_pages;
+	uint32_t		nr_cached_guest_pages;
 	uint32_t		max_guest_pages;
+	struct guest_page       *cached_guest_pages;
 	struct guest_page       *guest_pages;
 
 	int			slave_req_fd;
@@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 {
 	uint32_t i;
 	struct guest_page *page;
+	uint32_t cached_pages = dev->nr_cached_guest_pages;
+
+	for (i = 0; i < cached_pages; i++) {
+		page = &dev->cached_guest_pages[i];
+		if (gpa >= page->guest_phys_addr &&
+			gpa + size < page->guest_phys_addr + page->size) {
+			return gpa - page->guest_phys_addr +
+				page->host_phys_addr;
+		}
+	}
 
 	for (i = 0; i < dev->nr_guest_pages; i++) {
 		page = &dev->guest_pages[i];
 
 		if (gpa >= page->guest_phys_addr &&
 		    gpa + size < page->guest_phys_addr + page->size) {
+			rte_memcpy(&dev->cached_guest_pages[cached_pages],
+				   page, sizeof(struct guest_page));
+			dev->nr_cached_guest_pages++;
 			return gpa - page->guest_phys_addr +
 			       page->host_phys_addr;
 		}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 79fcb9d19..1bae1fddc 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
 	}
 
 	rte_free(dev->guest_pages);
+	rte_free(dev->cached_guest_pages);
 	dev->guest_pages = NULL;
+	dev->cached_guest_pages = NULL;
 
 	if (dev->log_addr) {
 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
@@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		   uint64_t host_phys_addr, uint64_t size)
 {
 	struct guest_page *page, *last_page;
-	struct guest_page *old_pages;
+	struct guest_page *old_pages, *old_cached_pages;
 
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
@@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		dev->guest_pages = rte_realloc(dev->guest_pages,
 					dev->max_guest_pages * sizeof(*page),
 					RTE_CACHE_LINE_SIZE);
-		if (dev->guest_pages == NULL) {
+		old_cached_pages = dev->cached_guest_pages;
+		dev->cached_guest_pages = rte_realloc(dev->cached_guest_pages,
+						dev->max_guest_pages *
+						sizeof(*page),
+						RTE_CACHE_LINE_SIZE);
+		dev->nr_cached_guest_pages = 0;
+		if (dev->guest_pages == NULL ||
+				dev->cached_guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
 			rte_free(old_pages);
+			rte_free(old_cached_pages);
+			dev->guest_pages = NULL;
+			dev->cached_guest_pages = NULL;
 			return -1;
 		}
 	}
@@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		}
 	}
 
+	if (dev->cached_guest_pages == NULL) {
+		dev->cached_guest_pages = rte_zmalloc(NULL,
+						dev->max_guest_pages *
+						sizeof(struct guest_page),
+						RTE_CACHE_LINE_SIZE);
+		if (dev->cached_guest_pages == NULL) {
+			VHOST_LOG_CONFIG(ERR,
+				"(%d) failed to allocate memory "
+				"for dev->cached_guest_pages\n",
+				dev->vid);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+	}
+
 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
 	if (dev->mem == NULL) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
  2020-04-01 10:08   ` Gavin Hu
  2020-04-01 14:50   ` [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation Marvin Liu
@ 2020-04-02  2:57   ` Ye Xiaolong
  2020-04-03  8:22   ` Ma, LihongX
  2020-04-15 11:15   ` Maxime Coquelin
  4 siblings, 0 replies; 31+ messages in thread
From: Ye Xiaolong @ 2020-04-02  2:57 UTC (permalink / raw)
  To: Marvin Liu; +Cc: maxime.coquelin, zhihong.wang, dev

On 04/01, Marvin Liu wrote:
>Replace dynamic memory allocator with dpdk memory allocator.
>
>Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
>diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>index bd1be0104..79fcb9d19 100644
>--- a/lib/librte_vhost/vhost_user.c
>+++ b/lib/librte_vhost/vhost_user.c
>@@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
> 		dev->mem = NULL;
> 	}
> 
>-	free(dev->guest_pages);
>+	rte_free(dev->guest_pages);
> 	dev->guest_pages = NULL;
> 
> 	if (dev->log_addr) {
>@@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> 	if (dev->nr_guest_pages == dev->max_guest_pages) {
> 		dev->max_guest_pages *= 2;
> 		old_pages = dev->guest_pages;
>-		dev->guest_pages = realloc(dev->guest_pages,
>-					dev->max_guest_pages * sizeof(*page));
>-		if (!dev->guest_pages) {
>+		dev->guest_pages = rte_realloc(dev->guest_pages,
>+					dev->max_guest_pages * sizeof(*page),
>+					RTE_CACHE_LINE_SIZE);
>+		if (dev->guest_pages == NULL) {
> 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
>-			free(old_pages);
>+			rte_free(old_pages);
> 			return -1;
> 		}
> 	}
>@@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
> 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
> 
> 	dev->nr_guest_pages = 0;
>-	if (!dev->guest_pages) {
>+	if (dev->guest_pages == NULL) {
> 		dev->max_guest_pages = 8;
>-		dev->guest_pages = malloc(dev->max_guest_pages *
>-						sizeof(struct guest_page));
>+		dev->guest_pages = rte_zmalloc(NULL,
>+					dev->max_guest_pages *
>+					sizeof(struct guest_page),
>+					RTE_CACHE_LINE_SIZE);
> 		if (dev->guest_pages == NULL) {
> 			VHOST_LOG_CONFIG(ERR,
> 				"(%d) failed to allocate memory "
>-- 
>2.17.1
>

Reviewed-by: Xiaolong Ye <xiaolong.ye@intel.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-01 14:50   ` [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation Marvin Liu
  2020-04-01 10:07     ` Gavin Hu
@ 2020-04-02  2:57     ` Ye Xiaolong
  2020-04-27  8:45     ` Maxime Coquelin
  2 siblings, 0 replies; 31+ messages in thread
From: Ye Xiaolong @ 2020-04-02  2:57 UTC (permalink / raw)
  To: Marvin Liu; +Cc: maxime.coquelin, zhihong.wang, dev

On 04/01, Marvin Liu wrote:
>If Tx zero copy enabled, gpa to hpa mapping table is updated one by
>one. This will harm performance when guest memory backend using 2M
>hugepages. Now add cached mapping table which will sorted by using
>sequence. Address translation will first check cached mapping table,
>then check unsorted mapping table if no match found.
>
>Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>index 2087d1400..5cb0e83dd 100644
>--- a/lib/librte_vhost/vhost.h
>+++ b/lib/librte_vhost/vhost.h
>@@ -368,7 +368,9 @@ struct virtio_net {
> 	struct vhost_device_ops const *notify_ops;
> 
> 	uint32_t		nr_guest_pages;
>+	uint32_t		nr_cached_guest_pages;
> 	uint32_t		max_guest_pages;
>+	struct guest_page       *cached_guest_pages;
> 	struct guest_page       *guest_pages;
> 
> 	int			slave_req_fd;
>@@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
> {
> 	uint32_t i;
> 	struct guest_page *page;
>+	uint32_t cached_pages = dev->nr_cached_guest_pages;
>+
>+	for (i = 0; i < cached_pages; i++) {
>+		page = &dev->cached_guest_pages[i];
>+		if (gpa >= page->guest_phys_addr &&
>+			gpa + size < page->guest_phys_addr + page->size) {
>+			return gpa - page->guest_phys_addr +
>+				page->host_phys_addr;
>+		}
>+	}
> 
> 	for (i = 0; i < dev->nr_guest_pages; i++) {
> 		page = &dev->guest_pages[i];
> 
> 		if (gpa >= page->guest_phys_addr &&
> 		    gpa + size < page->guest_phys_addr + page->size) {
>+			rte_memcpy(&dev->cached_guest_pages[cached_pages],
>+				   page, sizeof(struct guest_page));
>+			dev->nr_cached_guest_pages++;
> 			return gpa - page->guest_phys_addr +
> 			       page->host_phys_addr;
> 		}
>diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>index 79fcb9d19..1bae1fddc 100644
>--- a/lib/librte_vhost/vhost_user.c
>+++ b/lib/librte_vhost/vhost_user.c
>@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> 	}
> 
> 	rte_free(dev->guest_pages);
>+	rte_free(dev->cached_guest_pages);
> 	dev->guest_pages = NULL;
>+	dev->cached_guest_pages = NULL;
> 
> 	if (dev->log_addr) {
> 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
>@@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> 		   uint64_t host_phys_addr, uint64_t size)
> {
> 	struct guest_page *page, *last_page;
>-	struct guest_page *old_pages;
>+	struct guest_page *old_pages, *old_cached_pages;
> 
> 	if (dev->nr_guest_pages == dev->max_guest_pages) {
> 		dev->max_guest_pages *= 2;
>@@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> 		dev->guest_pages = rte_realloc(dev->guest_pages,
> 					dev->max_guest_pages * sizeof(*page),
> 					RTE_CACHE_LINE_SIZE);
>-		if (dev->guest_pages == NULL) {
>+		old_cached_pages = dev->cached_guest_pages;
>+		dev->cached_guest_pages = rte_realloc(dev->cached_guest_pages,
>+						dev->max_guest_pages *
>+						sizeof(*page),
>+						RTE_CACHE_LINE_SIZE);
>+		dev->nr_cached_guest_pages = 0;
>+		if (dev->guest_pages == NULL ||
>+				dev->cached_guest_pages == NULL) {
> 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
> 			rte_free(old_pages);
>+			rte_free(old_cached_pages);
>+			dev->guest_pages = NULL;
>+			dev->cached_guest_pages = NULL;
> 			return -1;
> 		}
> 	}
>@@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
> 		}
> 	}
> 
>+	if (dev->cached_guest_pages == NULL) {
>+		dev->cached_guest_pages = rte_zmalloc(NULL,
>+						dev->max_guest_pages *
>+						sizeof(struct guest_page),
>+						RTE_CACHE_LINE_SIZE);
>+		if (dev->cached_guest_pages == NULL) {
>+			VHOST_LOG_CONFIG(ERR,
>+				"(%d) failed to allocate memory "
>+				"for dev->cached_guest_pages\n",
>+				dev->vid);
>+			return RTE_VHOST_MSG_RESULT_ERR;
>+		}
>+	}
>+
> 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
> 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> 	if (dev->mem == NULL) {
>-- 
>2.17.1
>

Reviewed-by: Xiaolong Ye <xiaolong.ye@intel.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-01 13:01       ` Liu, Yong
@ 2020-04-02  3:04         ` Gavin Hu
  2020-04-02  4:45           ` Liu, Yong
  2020-04-03  8:22         ` Ma, LihongX
  1 sibling, 1 reply; 31+ messages in thread
From: Gavin Hu @ 2020-04-02  3:04 UTC (permalink / raw)
  To: Liu, Yong, maxime.coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev, nd, nd

Hi Marvin,

> -----Original Message-----
> From: Liu, Yong <yong.liu@intel.com>
> Sent: Wednesday, April 1, 2020 9:01 PM
> To: Gavin Hu <Gavin.Hu@arm.com>; maxime.coquelin@redhat.com; Ye,
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> 
> 
> > -----Original Message-----
> > From: Gavin Hu <Gavin.Hu@arm.com>
> > Sent: Wednesday, April 1, 2020 6:07 PM
> > To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> > Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> > <zhihong.wang@intel.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>
> > Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> >
> > Hi Marvin,
> >
> > > -----Original Message-----
> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > > Sent: Wednesday, April 1, 2020 10:50 PM
> > > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> > > zhihong.wang@intel.com
> > > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> > >
> > > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > > one. This will harm performance when guest memory backend using 2M
> > > hugepages. Now add cached mapping table which will sorted by using
> > > sequence. Address translation will first check cached mapping table,
> > > then check unsorted mapping table if no match found.
> > >
> > > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> > >
> > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > > index 2087d1400..5cb0e83dd 100644
> > > --- a/lib/librte_vhost/vhost.h
> > > +++ b/lib/librte_vhost/vhost.h
> > > @@ -368,7 +368,9 @@ struct virtio_net {
> > >  	struct vhost_device_ops const *notify_ops;
> > >
> > >  	uint32_t		nr_guest_pages;
> > > +	uint32_t		nr_cached_guest_pages;
> > >  	uint32_t		max_guest_pages;
> > > +	struct guest_page       *cached_guest_pages;
> > >  	struct guest_page       *guest_pages;
> > >
> > >  	int			slave_req_fd;
> > > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t
> gpa,
> > > uint64_t size)
> > >  {
> > >  	uint32_t i;
> > >  	struct guest_page *page;
> > > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > > +

Add a comment here, something like "Firstly look up in the cached pages"? 

> > > +	for (i = 0; i < cached_pages; i++) {

Should the searching order reversed  here to search the most recent entries? 

> > > +		page = &dev->cached_guest_pages[i];
> > > +		if (gpa >= page->guest_phys_addr &&
> > > +			gpa + size < page->guest_phys_addr + page->size) {
> > > +			return gpa - page->guest_phys_addr +
> > > +				page->host_phys_addr;
> > > +		}
> > > +	}
> > Sorry, I did not see any speedup with cached guest pages in comparison to
> > the old code below.
> > Is it not a simple copy?
> > Is it a better idea to use hash instead to speed up the translation?
> > /Gavin
> 
> Hi Gavin,
> Here just resort the overall mapping table according to using sequence.
> Most likely virtio driver will reuse recently cycled buffers, thus search will
> find match in beginning.
> That is simple fix for performance enhancement. If use hash for index, it will
> take much more cost in normal case.
> 
> Regards,
> Marvin

There are issues here, the cached table is growing over time, will it become less efficient when growing too big and even bigger than the original table and overflow happen? 
Is it a good idea to limit the cached entries to small therefore make the search quicker? 
/Gavin
> 
> 
> > >
Add a comment here, something like "Fall back to normal lookup if failed to get from the cached table"? 

> > >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> > >  		page = &dev->guest_pages[i];
> > >
> > >  		if (gpa >= page->guest_phys_addr &&
> > >  		    gpa + size < page->guest_phys_addr + page->size) {
> > > +			rte_memcpy(&dev-
> > > >cached_guest_pages[cached_pages],
> > > +				   page, sizeof(struct guest_page));
> > > +			dev->nr_cached_guest_pages++;

Will overflow happen over time when there are many translations? 

> > >  			return gpa - page->guest_phys_addr +
> > >  			       page->host_phys_addr;
> > >  		}
> > > diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> > > index 79fcb9d19..1bae1fddc 100644
> > > --- a/lib/librte_vhost/vhost_user.c
> > > +++ b/lib/librte_vhost/vhost_user.c
> > > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> > >  	}
> > >
> > >  	rte_free(dev->guest_pages);
> > > +	rte_free(dev->cached_guest_pages);
> > >  	dev->guest_pages = NULL;
> > > +	dev->cached_guest_pages = NULL;
> > >
> > >  	if (dev->log_addr) {
> > >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> > > @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> > > uint64_t guest_phys_addr,
> > >  		   uint64_t host_phys_addr, uint64_t size)
> > >  {
> > >  	struct guest_page *page, *last_page;
> > > -	struct guest_page *old_pages;
> > > +	struct guest_page *old_pages, *old_cached_pages;
> > >
> > >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> > >  		dev->max_guest_pages *= 2;
> > > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> > > uint64_t guest_phys_addr,
> > >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> > >  					dev->max_guest_pages *
> > > sizeof(*page),
> > >  					RTE_CACHE_LINE_SIZE);
> > > -		if (dev->guest_pages == NULL) {
> > > +		old_cached_pages = dev->cached_guest_pages;
> > > +		dev->cached_guest_pages = rte_realloc(dev-
> > > >cached_guest_pages,
> > > +						dev->max_guest_pages *
> > > +						sizeof(*page),
> > > +						RTE_CACHE_LINE_SIZE);
> > > +		dev->nr_cached_guest_pages = 0;
> > > +		if (dev->guest_pages == NULL ||
> > > +				dev->cached_guest_pages == NULL) {
> > >  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> > > guest_pages\n");
> > >  			rte_free(old_pages);
> > > +			rte_free(old_cached_pages);
> > > +			dev->guest_pages = NULL;
> > > +			dev->cached_guest_pages = NULL;
> > >  			return -1;
> > >  		}
> > >  	}
> > > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net
> > > **pdev, struct VhostUserMsg *msg,
> > >  		}
> > >  	}
> > >
> > > +	if (dev->cached_guest_pages == NULL) {
> > > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > > +						dev->max_guest_pages *
> > > +						sizeof(struct guest_page),
> > > +						RTE_CACHE_LINE_SIZE);
> > > +		if (dev->cached_guest_pages == NULL) {
> > > +			VHOST_LOG_CONFIG(ERR,
> > > +				"(%d) failed to allocate memory "
> > > +				"for dev->cached_guest_pages\n",
> > > +				dev->vid);
> > > +			return RTE_VHOST_MSG_RESULT_ERR;
> > > +		}
> > > +	}
> > > +
> > >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > > rte_vhost_memory) +
> > >  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> > > 0);
> > >  	if (dev->mem == NULL) {
> > > --
> > > 2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-02  3:04         ` Gavin Hu
@ 2020-04-02  4:45           ` Liu, Yong
  0 siblings, 0 replies; 31+ messages in thread
From: Liu, Yong @ 2020-04-02  4:45 UTC (permalink / raw)
  To: Gavin Hu, maxime.coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev, nd, nd



> -----Original Message-----
> From: Gavin Hu <Gavin.Hu@arm.com>
> Sent: Thursday, April 2, 2020 11:05 AM
> To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> Hi Marvin,
> 
> > -----Original Message-----
> > From: Liu, Yong <yong.liu@intel.com>
> > Sent: Wednesday, April 1, 2020 9:01 PM
> > To: Gavin Hu <Gavin.Hu@arm.com>; maxime.coquelin@redhat.com; Ye,
> > Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> > <zhihong.wang@intel.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>
> > Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> >
> >
> >
> > > -----Original Message-----
> > > From: Gavin Hu <Gavin.Hu@arm.com>
> > > Sent: Wednesday, April 1, 2020 6:07 PM
> > > To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> > > Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> > > <zhihong.wang@intel.com>
> > > Cc: dev@dpdk.org; nd <nd@arm.com>
> > > Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa
> translation
> > >
> > > Hi Marvin,
> > >
> > > > -----Original Message-----
> > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > > > Sent: Wednesday, April 1, 2020 10:50 PM
> > > > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> > > > zhihong.wang@intel.com
> > > > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > > > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> > > >
> > > > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > > > one. This will harm performance when guest memory backend using
> 2M
> > > > hugepages. Now add cached mapping table which will sorted by using
> > > > sequence. Address translation will first check cached mapping table,
> > > > then check unsorted mapping table if no match found.
> > > >
> > > > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> > > >
> > > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > > > index 2087d1400..5cb0e83dd 100644
> > > > --- a/lib/librte_vhost/vhost.h
> > > > +++ b/lib/librte_vhost/vhost.h
> > > > @@ -368,7 +368,9 @@ struct virtio_net {
> > > >  	struct vhost_device_ops const *notify_ops;
> > > >
> > > >  	uint32_t		nr_guest_pages;
> > > > +	uint32_t		nr_cached_guest_pages;
> > > >  	uint32_t		max_guest_pages;
> > > > +	struct guest_page       *cached_guest_pages;
> > > >  	struct guest_page       *guest_pages;
> > > >
> > > >  	int			slave_req_fd;
> > > > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t
> > gpa,
> > > > uint64_t size)
> > > >  {
> > > >  	uint32_t i;
> > > >  	struct guest_page *page;
> > > > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > > > +
> 
> Add a comment here, something like "Firstly look up in the cached pages"?
> 
> > > > +	for (i = 0; i < cached_pages; i++) {
> 
> Should the searching order reversed  here to search the most recent entries?
> 
> > > > +		page = &dev->cached_guest_pages[i];
> > > > +		if (gpa >= page->guest_phys_addr &&
> > > > +			gpa + size < page->guest_phys_addr + page->size) {
> > > > +			return gpa - page->guest_phys_addr +
> > > > +				page->host_phys_addr;
> > > > +		}
> > > > +	}
> > > Sorry, I did not see any speedup with cached guest pages in comparison
> to
> > > the old code below.
> > > Is it not a simple copy?
> > > Is it a better idea to use hash instead to speed up the translation?
> > > /Gavin
> >
> > Hi Gavin,
> > Here just resort the overall mapping table according to using sequence.
> > Most likely virtio driver will reuse recently cycled buffers, thus search will
> > find match in beginning.
> > That is simple fix for performance enhancement. If use hash for index, it
> will
> > take much more cost in normal case.
> >
> > Regards,
> > Marvin
> 
> There are issues here, the cached table is growing over time, will it become
> less efficient when growing too big and even bigger than the original table
> and overflow happen?
> Is it a good idea to limit the cached entries to small therefore make the
> search quicker?
> /Gavin
> >

Gavin,
Cached table size is the same as mapping table, it only recorded entries from original table which have been used.
At worst case like every access of guest memory is random, cached table will be same size of original size. Search cost is same as before. 
It will be hard to predict which size is more suitable for caching, that is depend on how guest driver allocate buffer.  Maybe less than ten when using 2M page and thousands when using 4K page.
So here just add resorted table, which cost is much less in normal case and same as before at worst case. 

Thanks,
Marvin

> >
> > > >
> Add a comment here, something like "Fall back to normal lookup if failed to
> get from the cached table"?
> 
> > > >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> > > >  		page = &dev->guest_pages[i];
> > > >
> > > >  		if (gpa >= page->guest_phys_addr &&
> > > >  		    gpa + size < page->guest_phys_addr + page->size) {
> > > > +			rte_memcpy(&dev-
> > > > >cached_guest_pages[cached_pages],
> > > > +				   page, sizeof(struct guest_page));
> > > > +			dev->nr_cached_guest_pages++;
> 
> Will overflow happen over time when there are many translations?
> 
> > > >  			return gpa - page->guest_phys_addr +
> > > >  			       page->host_phys_addr;
> > > >  		}
> > > > diff --git a/lib/librte_vhost/vhost_user.c
> b/lib/librte_vhost/vhost_user.c
> > > > index 79fcb9d19..1bae1fddc 100644
> > > > --- a/lib/librte_vhost/vhost_user.c
> > > > +++ b/lib/librte_vhost/vhost_user.c
> > > > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> > > >  	}
> > > >
> > > >  	rte_free(dev->guest_pages);
> > > > +	rte_free(dev->cached_guest_pages);
> > > >  	dev->guest_pages = NULL;
> > > > +	dev->cached_guest_pages = NULL;
> > > >
> > > >  	if (dev->log_addr) {
> > > >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> > > > @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> > > > uint64_t guest_phys_addr,
> > > >  		   uint64_t host_phys_addr, uint64_t size)
> > > >  {
> > > >  	struct guest_page *page, *last_page;
> > > > -	struct guest_page *old_pages;
> > > > +	struct guest_page *old_pages, *old_cached_pages;
> > > >
> > > >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> > > >  		dev->max_guest_pages *= 2;
> > > > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> > > > uint64_t guest_phys_addr,
> > > >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> > > >  					dev->max_guest_pages *
> > > > sizeof(*page),
> > > >  					RTE_CACHE_LINE_SIZE);
> > > > -		if (dev->guest_pages == NULL) {
> > > > +		old_cached_pages = dev->cached_guest_pages;
> > > > +		dev->cached_guest_pages = rte_realloc(dev-
> > > > >cached_guest_pages,
> > > > +						dev->max_guest_pages *
> > > > +						sizeof(*page),
> > > > +						RTE_CACHE_LINE_SIZE);
> > > > +		dev->nr_cached_guest_pages = 0;
> > > > +		if (dev->guest_pages == NULL ||
> > > > +				dev->cached_guest_pages == NULL) {
> > > >  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> > > > guest_pages\n");
> > > >  			rte_free(old_pages);
> > > > +			rte_free(old_cached_pages);
> > > > +			dev->guest_pages = NULL;
> > > > +			dev->cached_guest_pages = NULL;
> > > >  			return -1;
> > > >  		}
> > > >  	}
> > > > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct
> virtio_net
> > > > **pdev, struct VhostUserMsg *msg,
> > > >  		}
> > > >  	}
> > > >
> > > > +	if (dev->cached_guest_pages == NULL) {
> > > > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > > > +						dev->max_guest_pages *
> > > > +						sizeof(struct guest_page),
> > > > +						RTE_CACHE_LINE_SIZE);
> > > > +		if (dev->cached_guest_pages == NULL) {
> > > > +			VHOST_LOG_CONFIG(ERR,
> > > > +				"(%d) failed to allocate memory "
> > > > +				"for dev->cached_guest_pages\n",
> > > > +				dev->vid);
> > > > +			return RTE_VHOST_MSG_RESULT_ERR;
> > > > +		}
> > > > +	}
> > > > +
> > > >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > > > rte_vhost_memory) +
> > > >  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> > > > 0);
> > > >  	if (dev->mem == NULL) {
> > > > --
> > > > 2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
                     ` (2 preceding siblings ...)
  2020-04-02  2:57   ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Ye Xiaolong
@ 2020-04-03  8:22   ` Ma, LihongX
  2020-04-15 11:15   ` Maxime Coquelin
  4 siblings, 0 replies; 31+ messages in thread
From: Ma, LihongX @ 2020-04-03  8:22 UTC (permalink / raw)
  To: Liu, Yong, maxime.coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev, Liu, Yong

Tested-by: ma,lihong<lihongx.ma@intel.com>

Regards,
Ma,lihong


-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Marvin Liu
Sent: Wednesday, April 1, 2020 10:50 PM
To: maxime.coquelin@redhat.com; Ye, Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
Cc: dev@dpdk.org; Liu, Yong <yong.liu@intel.com>
Subject: [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator

Replace dynamic memory allocator with dpdk memory allocator.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index bd1be0104..79fcb9d19 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		dev->mem = NULL;
 	}
 
-	free(dev->guest_pages);
+	rte_free(dev->guest_pages);
 	dev->guest_pages = NULL;
 
 	if (dev->log_addr) {
@@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
 		old_pages = dev->guest_pages;
-		dev->guest_pages = realloc(dev->guest_pages,
-					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->guest_pages = rte_realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page),
+					RTE_CACHE_LINE_SIZE);
+		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
-			free(old_pages);
+			rte_free(old_pages);
 			return -1;
 		}
 	}
@@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
 
 	dev->nr_guest_pages = 0;
-	if (!dev->guest_pages) {
+	if (dev->guest_pages == NULL) {
 		dev->max_guest_pages = 8;
-		dev->guest_pages = malloc(dev->max_guest_pages *
-						sizeof(struct guest_page));
+		dev->guest_pages = rte_zmalloc(NULL,
+					dev->max_guest_pages *
+					sizeof(struct guest_page),
+					RTE_CACHE_LINE_SIZE);
 		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"(%d) failed to allocate memory "
--
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-01 13:01       ` Liu, Yong
  2020-04-02  3:04         ` Gavin Hu
@ 2020-04-03  8:22         ` Ma, LihongX
  1 sibling, 0 replies; 31+ messages in thread
From: Ma, LihongX @ 2020-04-03  8:22 UTC (permalink / raw)
  To: Liu, Yong, Gavin Hu, maxime.coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev, nd

Tested-by: ma,lihong<lihongx.ma@intel.com>

Regards,
Ma,lihong


-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liu, Yong
Sent: Wednesday, April 1, 2020 9:01 PM
To: Gavin Hu <Gavin.Hu@arm.com>; maxime.coquelin@redhat.com; Ye, Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
Cc: dev@dpdk.org; nd <nd@arm.com>
Subject: Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation



> -----Original Message-----
> From: Gavin Hu <Gavin.Hu@arm.com>
> Sent: Wednesday, April 1, 2020 6:07 PM
> To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye, 
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong 
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa 
> translation
> 
> Hi Marvin,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > Sent: Wednesday, April 1, 2020 10:50 PM
> > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com; 
> > zhihong.wang@intel.com
> > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa 
> > translation
> >
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by 
> > one. This will harm performance when guest memory backend using 2M 
> > hugepages. Now add cached mapping table which will sorted by using 
> > sequence. Address translation will first check cached mapping table, 
> > then check unsorted mapping table if no match found.
> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h 
> > index 2087d1400..5cb0e83dd 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -368,7 +368,9 @@ struct virtio_net {
> >  	struct vhost_device_ops const *notify_ops;
> >
> >  	uint32_t		nr_guest_pages;
> > +	uint32_t		nr_cached_guest_pages;
> >  	uint32_t		max_guest_pages;
> > +	struct guest_page       *cached_guest_pages;
> >  	struct guest_page       *guest_pages;
> >
> >  	int			slave_req_fd;
> > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t 
> > gpa, uint64_t size)  {
> >  	uint32_t i;
> >  	struct guest_page *page;
> > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > +
> > +	for (i = 0; i < cached_pages; i++) {
> > +		page = &dev->cached_guest_pages[i];
> > +		if (gpa >= page->guest_phys_addr &&
> > +			gpa + size < page->guest_phys_addr + page->size) {
> > +			return gpa - page->guest_phys_addr +
> > +				page->host_phys_addr;
> > +		}
> > +	}
> Sorry, I did not see any speedup with cached guest pages in comparison 
> to the old code below.
> Is it not a simple copy?
> Is it a better idea to use hash instead to speed up the translation?
> /Gavin

Hi Gavin,
Here just resort the overall mapping table according to using sequence.  
Most likely virtio driver will reuse recently cycled buffers, thus search will find match in beginning. 
That is simple fix for performance enhancement. If use hash for index, it will take much more cost in normal case.

Regards,
Marvin 


> >
> >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> >  		page = &dev->guest_pages[i];
> >
> >  		if (gpa >= page->guest_phys_addr &&
> >  		    gpa + size < page->guest_phys_addr + page->size) {
> > +			rte_memcpy(&dev-
> > >cached_guest_pages[cached_pages],
> > +				   page, sizeof(struct guest_page));
> > +			dev->nr_cached_guest_pages++;
> >  			return gpa - page->guest_phys_addr +
> >  			       page->host_phys_addr;
> >  		}
> > diff --git a/lib/librte_vhost/vhost_user.c 
> > b/lib/librte_vhost/vhost_user.c index 79fcb9d19..1bae1fddc 100644
> > --- a/lib/librte_vhost/vhost_user.c
> > +++ b/lib/librte_vhost/vhost_user.c
> > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> >  	}
> >
> >  	rte_free(dev->guest_pages);
> > +	rte_free(dev->cached_guest_pages);
> >  	dev->guest_pages = NULL;
> > +	dev->cached_guest_pages = NULL;
> >
> >  	if (dev->log_addr) {
> >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); @@ 
> > -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t 
> > guest_phys_addr,
> >  		   uint64_t host_phys_addr, uint64_t size)  {
> >  	struct guest_page *page, *last_page;
> > -	struct guest_page *old_pages;
> > +	struct guest_page *old_pages, *old_cached_pages;
> >
> >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> >  		dev->max_guest_pages *= 2;
> > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev, 
> > uint64_t guest_phys_addr,
> >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> >  					dev->max_guest_pages *
> > sizeof(*page),
> >  					RTE_CACHE_LINE_SIZE);
> > -		if (dev->guest_pages == NULL) {
> > +		old_cached_pages = dev->cached_guest_pages;
> > +		dev->cached_guest_pages = rte_realloc(dev-
> > >cached_guest_pages,
> > +						dev->max_guest_pages *
> > +						sizeof(*page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		dev->nr_cached_guest_pages = 0;
> > +		if (dev->guest_pages == NULL ||
> > +				dev->cached_guest_pages == NULL) {
> >  			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
> >  			rte_free(old_pages);
> > +			rte_free(old_cached_pages);
> > +			dev->guest_pages = NULL;
> > +			dev->cached_guest_pages = NULL;
> >  			return -1;
> >  		}
> >  	}
> > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net 
> > **pdev, struct VhostUserMsg *msg,
> >  		}
> >  	}
> >
> > +	if (dev->cached_guest_pages == NULL) {
> > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > +						dev->max_guest_pages *
> > +						sizeof(struct guest_page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		if (dev->cached_guest_pages == NULL) {
> > +			VHOST_LOG_CONFIG(ERR,
> > +				"(%d) failed to allocate memory "
> > +				"for dev->cached_guest_pages\n",
> > +				dev->vid);
> > +			return RTE_VHOST_MSG_RESULT_ERR;
> > +		}
> > +	}
> > +
> >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > rte_vhost_memory) +
> >  		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> >  	if (dev->mem == NULL) {
> > --
> > 2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
                     ` (3 preceding siblings ...)
  2020-04-03  8:22   ` Ma, LihongX
@ 2020-04-15 11:15   ` Maxime Coquelin
  4 siblings, 0 replies; 31+ messages in thread
From: Maxime Coquelin @ 2020-04-15 11:15 UTC (permalink / raw)
  To: Marvin Liu, xiaolong.ye, zhihong.wang; +Cc: dev



On 4/1/20 4:50 PM, Marvin Liu wrote:
> Replace dynamic memory allocator with dpdk memory allocator.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-01 14:50   ` [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation Marvin Liu
  2020-04-01 10:07     ` Gavin Hu
  2020-04-02  2:57     ` Ye Xiaolong
@ 2020-04-27  8:45     ` Maxime Coquelin
  2020-04-28  0:44       ` Liu, Yong
  2 siblings, 1 reply; 31+ messages in thread
From: Maxime Coquelin @ 2020-04-27  8:45 UTC (permalink / raw)
  To: Marvin Liu, xiaolong.ye, zhihong.wang; +Cc: dev

Hi Marvin,

On 4/1/20 4:50 PM, Marvin Liu wrote:
> If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> one. This will harm performance when guest memory backend using 2M
> hugepages. Now add cached mapping table which will sorted by using
> sequence. Address translation will first check cached mapping table,
> then check unsorted mapping table if no match found.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 

I don't like the approach, as I think it could have nasty effects.
For example, the system is loaded normally and let's say 25% of the
pages are used. Then we have a small spike, and buffers that were never
used start to be used, it will cause writing new entries into the cache
in the hot path when it is already overloaded. Wouldn't it increase the
number of packets dropped?

At set_mem_table time, instead of adding the guest pages unsorted, maybe
better to add them sorted there. Then you can use a better algorithm
than linear searching (O(n)), like binary search (O(log n)).

Thanks,
Maxime

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
  2020-04-27  8:45     ` Maxime Coquelin
@ 2020-04-28  0:44       ` Liu, Yong
  0 siblings, 0 replies; 31+ messages in thread
From: Liu, Yong @ 2020-04-28  0:44 UTC (permalink / raw)
  To: Maxime Coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev



> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Monday, April 27, 2020 4:45 PM
> To: Liu, Yong <yong.liu@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>;
> Wang, Zhihong <zhihong.wang@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> Hi Marvin,
> 
> On 4/1/20 4:50 PM, Marvin Liu wrote:
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > one. This will harm performance when guest memory backend using 2M
> > hugepages. Now add cached mapping table which will sorted by using
> > sequence. Address translation will first check cached mapping table,
> > then check unsorted mapping table if no match found.
> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> 
> I don't like the approach, as I think it could have nasty effects.
> For example, the system is loaded normally and let's say 25% of the
> pages are used. Then we have a small spike, and buffers that were never
> used start to be used, it will cause writing new entries into the cache
> in the hot path when it is already overloaded. Wouldn't it increase the
> number of packets dropped?
> 
> At set_mem_table time, instead of adding the guest pages unsorted, maybe
> better to add them sorted there. Then you can use a better algorithm
> than linear searching (O(n)), like binary search (O(log n)).
> 

Maxime,
Thanks for input. Previous sorted way is according using sequence, it may cause more packets drop if accessing pages sequence varied a lot.
Based on current dpdk and virtio-net implementation, it is unlikely to be happened. Anyway, it is not the best choice.
I will use binary search replace current cache solution.

Regards,
Marvin

> Thanks,
> Maxime
> 


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-03-16 15:33 [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping Marvin Liu
  2020-03-16 13:48 ` Ye Xiaolong
  2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
@ 2020-04-28  9:13 ` Marvin Liu
  2020-04-28  9:13   ` [dpdk-dev] [PATCH v3 2/2] vhost: binary search address mapping table Marvin Liu
  2020-04-28 12:51   ` [dpdk-dev] [PATCH v3 1/2] vhost: utilize dpdk dynamic memory allocator Maxime Coquelin
  2020-04-29  1:00 ` [dpdk-dev] [PATCH v4 1/2] net/virtio: add support Virtio link speed feature Marvin Liu
  2020-04-29  1:04 ` [dpdk-dev] [PATCH v4 1/2] " Marvin Liu
  4 siblings, 2 replies; 31+ messages in thread
From: Marvin Liu @ 2020-04-28  9:13 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

Replace dynamic memory allocator with dpdk memory allocator.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..79fcb9d19 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		dev->mem = NULL;
 	}
 
-	free(dev->guest_pages);
+	rte_free(dev->guest_pages);
 	dev->guest_pages = NULL;
 
 	if (dev->log_addr) {
@@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
 		old_pages = dev->guest_pages;
-		dev->guest_pages = realloc(dev->guest_pages,
-					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->guest_pages = rte_realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page),
+					RTE_CACHE_LINE_SIZE);
+		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
-			free(old_pages);
+			rte_free(old_pages);
 			return -1;
 		}
 	}
@@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
 
 	dev->nr_guest_pages = 0;
-	if (!dev->guest_pages) {
+	if (dev->guest_pages == NULL) {
 		dev->max_guest_pages = 8;
-		dev->guest_pages = malloc(dev->max_guest_pages *
-						sizeof(struct guest_page));
+		dev->guest_pages = rte_zmalloc(NULL,
+					dev->max_guest_pages *
+					sizeof(struct guest_page),
+					RTE_CACHE_LINE_SIZE);
 		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"(%d) failed to allocate memory "
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v3 2/2] vhost: binary search address mapping table
  2020-04-28  9:13 ` [dpdk-dev] [PATCH v3 " Marvin Liu
@ 2020-04-28  9:13   ` Marvin Liu
  2020-04-28 15:28     ` Maxime Coquelin
  2020-04-28 12:51   ` [dpdk-dev] [PATCH v3 1/2] vhost: utilize dpdk dynamic memory allocator Maxime Coquelin
  1 sibling, 1 reply; 31+ messages in thread
From: Marvin Liu @ 2020-04-28  9:13 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now utilize binary search to find the entry in mapping
table, meanwhile set threshold to 256 entries for linear search.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e592795f2..8769afaad 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -10,7 +10,7 @@ EXPORT_MAP := rte_vhost_version.map
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 CFLAGS += -I vhost_user
-CFLAGS += -fno-strict-aliasing
+CFLAGS += -fno-strict-aliasing -Wno-maybe-uninitialized
 LDLIBS += -lpthread
 
 ifeq ($(RTE_TOOLCHAIN), gcc)
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 507dbf214..a0fee39d5 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -546,20 +546,46 @@ extern int vhost_data_log_level;
 #define MAX_VHOST_DEVICE	1024
 extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 
+#define VHOST_BINARY_SEARCH_THRESH 256
+static int guest_page_addrcmp(const void *p1, const void *p2)
+{
+	const struct guest_page *page1 = (const struct guest_page *)p1;
+	const struct guest_page *page2 = (const struct guest_page *)p2;
+
+	if (page1->guest_phys_addr > page2->guest_phys_addr)
+		return 1;
+	if (page1->guest_phys_addr < page2->guest_phys_addr)
+		return -1;
+
+	return 0;
+}
+
 /* Convert guest physical address to host physical address */
 static __rte_always_inline rte_iova_t
 gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 {
 	uint32_t i;
 	struct guest_page *page;
-
-	for (i = 0; i < dev->nr_guest_pages; i++) {
-		page = &dev->guest_pages[i];
-
-		if (gpa >= page->guest_phys_addr &&
-		    gpa + size < page->guest_phys_addr + page->size) {
-			return gpa - page->guest_phys_addr +
-			       page->host_phys_addr;
+	struct guest_page key;
+
+	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
+		key.guest_phys_addr = gpa;
+		page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
+			       sizeof(struct guest_page), guest_page_addrcmp);
+		if (page) {
+			if (gpa + size < page->guest_phys_addr + page->size)
+				return gpa - page->guest_phys_addr +
+					page->host_phys_addr;
+		}
+	} else {
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+
+			if (gpa >= page->guest_phys_addr &&
+			    gpa + size < page->guest_phys_addr +
+			    page->size)
+				return gpa - page->guest_phys_addr +
+				       page->host_phys_addr;
 		}
 	}
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 79fcb9d19..15e50d27d 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -965,6 +965,12 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		reg_size -= size;
 	}
 
+	/* sort guest page array if over binary search threshold */
+	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
+		qsort((void *)dev->guest_pages, dev->nr_guest_pages,
+			sizeof(struct guest_page), guest_page_addrcmp);
+	}
+
 	return 0;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-28  9:13 ` [dpdk-dev] [PATCH v3 " Marvin Liu
  2020-04-28  9:13   ` [dpdk-dev] [PATCH v3 2/2] vhost: binary search address mapping table Marvin Liu
@ 2020-04-28 12:51   ` Maxime Coquelin
  1 sibling, 0 replies; 31+ messages in thread
From: Maxime Coquelin @ 2020-04-28 12:51 UTC (permalink / raw)
  To: Marvin Liu, xiaolong.ye, zhihong.wang; +Cc: dev



On 4/28/20 11:13 AM, Marvin Liu wrote:
> Replace dynamic memory allocator with dpdk memory allocator.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 


Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/2] vhost: binary search address mapping table
  2020-04-28  9:13   ` [dpdk-dev] [PATCH v3 2/2] vhost: binary search address mapping table Marvin Liu
@ 2020-04-28 15:28     ` Maxime Coquelin
  2020-04-28 15:38       ` Liu, Yong
  0 siblings, 1 reply; 31+ messages in thread
From: Maxime Coquelin @ 2020-04-28 15:28 UTC (permalink / raw)
  To: Marvin Liu, xiaolong.ye, zhihong.wang; +Cc: dev



On 4/28/20 11:13 AM, Marvin Liu wrote:
> If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> one. This will harm performance when guest memory backend using 2M
> hugepages. Now utilize binary search to find the entry in mapping
> table, meanwhile set threshold to 256 entries for linear search.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index e592795f2..8769afaad 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -10,7 +10,7 @@ EXPORT_MAP := rte_vhost_version.map
>  
>  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
>  CFLAGS += -I vhost_user
> -CFLAGS += -fno-strict-aliasing
> +CFLAGS += -fno-strict-aliasing -Wno-maybe-uninitialized
>  LDLIBS += -lpthread
>  
>  ifeq ($(RTE_TOOLCHAIN), gcc)
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 507dbf214..a0fee39d5 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -546,20 +546,46 @@ extern int vhost_data_log_level;
>  #define MAX_VHOST_DEVICE	1024
>  extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
>  
> +#define VHOST_BINARY_SEARCH_THRESH 256
> +static int guest_page_addrcmp(const void *p1, const void *p2)
> +{
> +	const struct guest_page *page1 = (const struct guest_page *)p1;
> +	const struct guest_page *page2 = (const struct guest_page *)p2;
> +
> +	if (page1->guest_phys_addr > page2->guest_phys_addr)
> +		return 1;
> +	if (page1->guest_phys_addr < page2->guest_phys_addr)
> +		return -1;
> +
> +	return 0;
> +}
> +
>  /* Convert guest physical address to host physical address */
>  static __rte_always_inline rte_iova_t
>  gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
>  {
>  	uint32_t i;
>  	struct guest_page *page;
> -
> -	for (i = 0; i < dev->nr_guest_pages; i++) {
> -		page = &dev->guest_pages[i];
> -
> -		if (gpa >= page->guest_phys_addr &&
> -		    gpa + size < page->guest_phys_addr + page->size) {
> -			return gpa - page->guest_phys_addr +
> -			       page->host_phys_addr;
> +	struct guest_page key;
> +
> +	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {

I would have expected the binary search to be more efficient for much
smaller number of pages. Have you done some tests to define this
threshold value?

> +		key.guest_phys_addr = gpa;
> +		page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
> +			       sizeof(struct guest_page), guest_page_addrcmp);
> +		if (page) {
> +			if (gpa + size < page->guest_phys_addr + page->size)
> +				return gpa - page->guest_phys_addr +
> +					page->host_phys_addr;
> +		}

Is all the generated code inlined?

I see that in the elf file:
2386: 0000000000874f70    16 FUNC    LOCAL  DEFAULT   13 guest_page_addrcmp

> +	} else {
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +
> +			if (gpa >= page->guest_phys_addr &&
> +			    gpa + size < page->guest_phys_addr +
> +			    page->size)
> +				return gpa - page->guest_phys_addr +
> +				       page->host_phys_addr;
>  		}
>  	}
>  
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 79fcb9d19..15e50d27d 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -965,6 +965,12 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
>  		reg_size -= size;
>  	}
>  
> +	/* sort guest page array if over binary search threshold */
> +	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> +		qsort((void *)dev->guest_pages, dev->nr_guest_pages,
> +			sizeof(struct guest_page), guest_page_addrcmp);
> +	}
> +
>  	return 0;
>  }
>  
> 


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/2] vhost: binary search address mapping table
  2020-04-28 15:28     ` Maxime Coquelin
@ 2020-04-28 15:38       ` Liu, Yong
  0 siblings, 0 replies; 31+ messages in thread
From: Liu, Yong @ 2020-04-28 15:38 UTC (permalink / raw)
  To: Maxime Coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev



> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, April 28, 2020 11:28 PM
> To: Liu, Yong <yong.liu@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>;
> Wang, Zhihong <zhihong.wang@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v3 2/2] vhost: binary search address mapping table
> 
> 
> 
> On 4/28/20 11:13 AM, Marvin Liu wrote:
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > one. This will harm performance when guest memory backend using 2M
> > hugepages. Now utilize binary search to find the entry in mapping
> > table, meanwhile set threshold to 256 entries for linear search.
> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> > diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> > index e592795f2..8769afaad 100644
> > --- a/lib/librte_vhost/Makefile
> > +++ b/lib/librte_vhost/Makefile
> > @@ -10,7 +10,7 @@ EXPORT_MAP := rte_vhost_version.map
> >
> >  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
> >  CFLAGS += -I vhost_user
> > -CFLAGS += -fno-strict-aliasing
> > +CFLAGS += -fno-strict-aliasing -Wno-maybe-uninitialized
> >  LDLIBS += -lpthread
> >
> >  ifeq ($(RTE_TOOLCHAIN), gcc)
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > index 507dbf214..a0fee39d5 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -546,20 +546,46 @@ extern int vhost_data_log_level;
> >  #define MAX_VHOST_DEVICE	1024
> >  extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
> >
> > +#define VHOST_BINARY_SEARCH_THRESH 256
> > +static int guest_page_addrcmp(const void *p1, const void *p2)
> > +{
> > +	const struct guest_page *page1 = (const struct guest_page *)p1;
> > +	const struct guest_page *page2 = (const struct guest_page *)p2;
> > +
> > +	if (page1->guest_phys_addr > page2->guest_phys_addr)
> > +		return 1;
> > +	if (page1->guest_phys_addr < page2->guest_phys_addr)
> > +		return -1;
> > +
> > +	return 0;
> > +}
> > +
> >  /* Convert guest physical address to host physical address */
> >  static __rte_always_inline rte_iova_t
> >  gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
> >  {
> >  	uint32_t i;
> >  	struct guest_page *page;
> > -
> > -	for (i = 0; i < dev->nr_guest_pages; i++) {
> > -		page = &dev->guest_pages[i];
> > -
> > -		if (gpa >= page->guest_phys_addr &&
> > -		    gpa + size < page->guest_phys_addr + page->size) {
> > -			return gpa - page->guest_phys_addr +
> > -			       page->host_phys_addr;
> > +	struct guest_page key;
> > +
> > +	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> 
> I would have expected the binary search to be more efficient for much
> smaller number of pages. Have you done some tests to define this
> threshold value?
> 
Maxime,
In my unit test, binary search will be better when size over 16. But it won't be the case with real VM.
I have tested around 128 to 1024 pages,  the benefit can be seen around 256. So threshold is set to it.

Thanks,
Marvin

> > +		key.guest_phys_addr = gpa;
> > +		page = bsearch(&key, dev->guest_pages, dev-
> >nr_guest_pages,
> > +			       sizeof(struct guest_page), guest_page_addrcmp);
> > +		if (page) {
> > +			if (gpa + size < page->guest_phys_addr + page->size)
> > +				return gpa - page->guest_phys_addr +
> > +					page->host_phys_addr;
> > +		}
> 
> Is all the generated code inlined?
> 
The compare function hasn't been inlined.  Will inline it in next version.

> I see that in the elf file:
> 2386: 0000000000874f70    16 FUNC    LOCAL  DEFAULT   13
> guest_page_addrcmp
> 
> > +	} else {
> > +		for (i = 0; i < dev->nr_guest_pages; i++) {
> > +			page = &dev->guest_pages[i];
> > +
> > +			if (gpa >= page->guest_phys_addr &&
> > +			    gpa + size < page->guest_phys_addr +
> > +			    page->size)
> > +				return gpa - page->guest_phys_addr +
> > +				       page->host_phys_addr;
> >  		}
> >  	}
> >
> > diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> > index 79fcb9d19..15e50d27d 100644
> > --- a/lib/librte_vhost/vhost_user.c
> > +++ b/lib/librte_vhost/vhost_user.c
> > @@ -965,6 +965,12 @@ add_guest_pages(struct virtio_net *dev, struct
> rte_vhost_mem_region *reg,
> >  		reg_size -= size;
> >  	}
> >
> > +	/* sort guest page array if over binary search threshold */
> > +	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> > +		qsort((void *)dev->guest_pages, dev->nr_guest_pages,
> > +			sizeof(struct guest_page), guest_page_addrcmp);
> > +	}
> > +
> >  	return 0;
> >  }
> >
> >


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v4 1/2] net/virtio: add support Virtio link speed feature
  2020-03-16 15:33 [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping Marvin Liu
                   ` (2 preceding siblings ...)
  2020-04-28  9:13 ` [dpdk-dev] [PATCH v3 " Marvin Liu
@ 2020-04-29  1:00 ` Marvin Liu
  2020-04-29  1:00   ` [dpdk-dev] [PATCH v4 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
                     ` (2 more replies)
  2020-04-29  1:04 ` [dpdk-dev] [PATCH v4 1/2] " Marvin Liu
  4 siblings, 3 replies; 31+ messages in thread
From: Marvin Liu @ 2020-04-29  1:00 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Ivan Dyukov

From: Ivan Dyukov <i.dyukov@samsung.com>

This patch adds a support of VIRTIO_NET_F_SPEED_DUPLEX feature
for virtio driver.

There are two ways to specify speed of the link:
   'speed' devarg
   negotiate speed from qemu via VIRTIO_NET_F_SPEED_DUPLEX
The highest priority is devarg. If devarg is not specified,
drivers tries to negotiate it from qemu.

Signed-off-by: Ivan Dyukov <i.dyukov@samsung.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 5058990d1..37766cbb6 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1662,7 +1662,8 @@ virtio_configure_intr(struct rte_eth_dev *dev)
 
 	return 0;
 }
-
+#define SPEED_UNKNOWN    0xffffffff
+#define DUPLEX_UNKNOWN   0xff
 /* reset device and renegotiate features if needed */
 static int
 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
@@ -1718,6 +1719,25 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
 		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
 		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
 
+	if (hw->speed == SPEED_UNKNOWN) {
+		if (vtpci_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
+			config = &local_config;
+			vtpci_read_dev_config(hw,
+				offsetof(struct virtio_net_config, speed),
+				&config->speed, sizeof(config->speed));
+			vtpci_read_dev_config(hw,
+				offsetof(struct virtio_net_config, duplex),
+				&config->duplex, sizeof(config->duplex));
+			hw->speed = config->speed;
+			hw->duplex = config->duplex;
+		}
+	}
+	if (hw->speed == SPEED_UNKNOWN)
+		hw->speed = ETH_SPEED_NUM_10G;
+	if (hw->duplex == DUPLEX_UNKNOWN)
+		hw->duplex = ETH_LINK_FULL_DUPLEX;
+	PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
+		hw->speed, hw->duplex);
 	if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
 		config = &local_config;
 
@@ -1865,7 +1885,7 @@ int
 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 {
 	struct virtio_hw *hw = eth_dev->data->dev_private;
-	uint32_t speed = ETH_SPEED_NUM_10G;
+	uint32_t speed = SPEED_UNKNOWN;
 	int ret;
 
 	if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
@@ -2450,7 +2470,7 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	memset(&link, 0, sizeof(link));
-	link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	link.link_duplex = hw->duplex;
 	link.link_speed  = hw->speed;
 	link.link_autoneg = ETH_LINK_AUTONEG;
 
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index cd8947656..febaf17a8 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -37,7 +37,8 @@
 	 1ULL << VIRTIO_F_RING_PACKED	  |	\
 	 1ULL << VIRTIO_F_IOMMU_PLATFORM  |	\
 	 1ULL << VIRTIO_F_ORDER_PLATFORM  |	\
-	 1ULL << VIRTIO_F_NOTIFICATION_DATA)
+	 1ULL << VIRTIO_F_NOTIFICATION_DATA | \
+	 1ULL << VIRTIO_NET_F_SPEED_DUPLEX)
 
 #define VIRTIO_PMD_SUPPORTED_GUEST_FEATURES	\
 	(VIRTIO_PMD_DEFAULT_GUEST_FEATURES |	\
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index ed98e11c3..bd89357e4 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -141,6 +141,9 @@ struct virtnet_ctl;
  */
 #define VIRTIO_F_NOTIFICATION_DATA 38
 
+/* Device set linkspeed and duplex */
+#define VIRTIO_NET_F_SPEED_DUPLEX 63
+
 /* The Guest publishes the used index for which it expects an interrupt
  * at the end of the avail ring. Host should ignore the avail->flags field. */
 /* The Host publishes the avail index for which it expects a kick
@@ -260,6 +263,7 @@ struct virtio_hw {
 	uint8_t     mac_addr[RTE_ETHER_ADDR_LEN];
 	uint32_t    notify_off_multiplier;
 	uint32_t    speed;  /* link speed in MB */
+	uint8_t     duplex;
 	uint8_t     *isr;
 	uint16_t    *notify_base;
 	struct virtio_pci_common_cfg *common_cfg;
@@ -306,6 +310,18 @@ struct virtio_net_config {
 	uint16_t   status;
 	uint16_t   max_virtqueue_pairs;
 	uint16_t   mtu;
+	/*
+	 * speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+	 * Any other value stands for unknown.
+	 */
+	uint32_t speed;
+	/*
+	 * 0x00 - half duplex
+	 * 0x01 - full duplex
+	 * Any other value stands for unknown.
+	 */
+	uint8_t duplex;
+
 } __attribute__((packed));
 
 /*
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v4 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-29  1:00 ` [dpdk-dev] [PATCH v4 1/2] net/virtio: add support Virtio link speed feature Marvin Liu
@ 2020-04-29  1:00   ` Marvin Liu
  2020-04-29  1:00   ` [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table Marvin Liu
  2020-04-29  1:01   ` [dpdk-dev] [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
  2 siblings, 0 replies; 31+ messages in thread
From: Marvin Liu @ 2020-04-29  1:00 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

Replace dynamic memory allocator with dpdk memory allocator.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..79fcb9d19 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		dev->mem = NULL;
 	}
 
-	free(dev->guest_pages);
+	rte_free(dev->guest_pages);
 	dev->guest_pages = NULL;
 
 	if (dev->log_addr) {
@@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
 		old_pages = dev->guest_pages;
-		dev->guest_pages = realloc(dev->guest_pages,
-					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->guest_pages = rte_realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page),
+					RTE_CACHE_LINE_SIZE);
+		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
-			free(old_pages);
+			rte_free(old_pages);
 			return -1;
 		}
 	}
@@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
 
 	dev->nr_guest_pages = 0;
-	if (!dev->guest_pages) {
+	if (dev->guest_pages == NULL) {
 		dev->max_guest_pages = 8;
-		dev->guest_pages = malloc(dev->max_guest_pages *
-						sizeof(struct guest_page));
+		dev->guest_pages = rte_zmalloc(NULL,
+					dev->max_guest_pages *
+					sizeof(struct guest_page),
+					RTE_CACHE_LINE_SIZE);
 		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"(%d) failed to allocate memory "
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table
  2020-04-29  1:00 ` [dpdk-dev] [PATCH v4 1/2] net/virtio: add support Virtio link speed feature Marvin Liu
  2020-04-29  1:00   ` [dpdk-dev] [PATCH v4 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
@ 2020-04-29  1:00   ` Marvin Liu
  2020-04-29  1:01   ` [dpdk-dev] [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
  2 siblings, 0 replies; 31+ messages in thread
From: Marvin Liu @ 2020-04-29  1:00 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now utilize binary search to find the entry in mapping
table, meanwhile set the threshold to 256 entries for linear search.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 507dbf214..998e133ad 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -546,20 +546,48 @@ extern int vhost_data_log_level;
 #define MAX_VHOST_DEVICE	1024
 extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 
+#define VHOST_BINARY_SEARCH_THRESH 256
+
+static __rte_always_inline int guest_page_addrcmp(const void *p1,
+						const void *p2)
+{
+	const struct guest_page *page1 = (const struct guest_page *)p1;
+	const struct guest_page *page2 = (const struct guest_page *)p2;
+
+	if (page1->guest_phys_addr > page2->guest_phys_addr)
+		return 1;
+	if (page1->guest_phys_addr < page2->guest_phys_addr)
+		return -1;
+
+	return 0;
+}
+
 /* Convert guest physical address to host physical address */
 static __rte_always_inline rte_iova_t
 gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 {
 	uint32_t i;
 	struct guest_page *page;
-
-	for (i = 0; i < dev->nr_guest_pages; i++) {
-		page = &dev->guest_pages[i];
-
-		if (gpa >= page->guest_phys_addr &&
-		    gpa + size < page->guest_phys_addr + page->size) {
-			return gpa - page->guest_phys_addr +
-			       page->host_phys_addr;
+	struct guest_page key;
+
+	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
+		key.guest_phys_addr = gpa;
+		page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
+			       sizeof(struct guest_page), guest_page_addrcmp);
+		if (page) {
+			if (gpa + size < page->guest_phys_addr + page->size)
+				return gpa - page->guest_phys_addr +
+					page->host_phys_addr;
+		}
+	} else {
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+
+			if (gpa >= page->guest_phys_addr &&
+			    gpa + size < page->guest_phys_addr +
+			    page->size)
+				return gpa - page->guest_phys_addr +
+				       page->host_phys_addr;
 		}
 	}
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 79fcb9d19..15e50d27d 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -965,6 +965,12 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		reg_size -= size;
 	}
 
+	/* sort guest page array if over binary search threshold */
+	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
+		qsort((void *)dev->guest_pages, dev->nr_guest_pages,
+			sizeof(struct guest_page), guest_page_addrcmp);
+	}
+
 	return 0;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-29  1:00 ` [dpdk-dev] [PATCH v4 1/2] net/virtio: add support Virtio link speed feature Marvin Liu
  2020-04-29  1:00   ` [dpdk-dev] [PATCH v4 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
  2020-04-29  1:00   ` [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table Marvin Liu
@ 2020-04-29  1:01   ` Marvin Liu
  2020-04-29  1:06     ` Liu, Yong
  2 siblings, 1 reply; 31+ messages in thread
From: Marvin Liu @ 2020-04-29  1:01 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

Replace dynamic memory allocator with dpdk memory allocator.

Signed-off-by: Marvin Liu <yong.liu@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..79fcb9d19 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		dev->mem = NULL;
 	}
 
-	free(dev->guest_pages);
+	rte_free(dev->guest_pages);
 	dev->guest_pages = NULL;
 
 	if (dev->log_addr) {
@@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
 		old_pages = dev->guest_pages;
-		dev->guest_pages = realloc(dev->guest_pages,
-					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->guest_pages = rte_realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page),
+					RTE_CACHE_LINE_SIZE);
+		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
-			free(old_pages);
+			rte_free(old_pages);
 			return -1;
 		}
 	}
@@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
 
 	dev->nr_guest_pages = 0;
-	if (!dev->guest_pages) {
+	if (dev->guest_pages == NULL) {
 		dev->max_guest_pages = 8;
-		dev->guest_pages = malloc(dev->max_guest_pages *
-						sizeof(struct guest_page));
+		dev->guest_pages = rte_zmalloc(NULL,
+					dev->max_guest_pages *
+					sizeof(struct guest_page),
+					RTE_CACHE_LINE_SIZE);
 		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"(%d) failed to allocate memory "
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v4 1/2] vhost: utilize dpdk dynamic memory allocator
  2020-03-16 15:33 [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping Marvin Liu
                   ` (3 preceding siblings ...)
  2020-04-29  1:00 ` [dpdk-dev] [PATCH v4 1/2] net/virtio: add support Virtio link speed feature Marvin Liu
@ 2020-04-29  1:04 ` Marvin Liu
  2020-04-29  1:04   ` [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table Marvin Liu
  4 siblings, 1 reply; 31+ messages in thread
From: Marvin Liu @ 2020-04-29  1:04 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

Replace dynamic memory allocator with dpdk memory allocator.

Signed-off-by: Marvin Liu <yong.liu@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..79fcb9d19 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		dev->mem = NULL;
 	}
 
-	free(dev->guest_pages);
+	rte_free(dev->guest_pages);
 	dev->guest_pages = NULL;
 
 	if (dev->log_addr) {
@@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
 		old_pages = dev->guest_pages;
-		dev->guest_pages = realloc(dev->guest_pages,
-					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->guest_pages = rte_realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page),
+					RTE_CACHE_LINE_SIZE);
+		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
-			free(old_pages);
+			rte_free(old_pages);
 			return -1;
 		}
 	}
@@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
 
 	dev->nr_guest_pages = 0;
-	if (!dev->guest_pages) {
+	if (dev->guest_pages == NULL) {
 		dev->max_guest_pages = 8;
-		dev->guest_pages = malloc(dev->max_guest_pages *
-						sizeof(struct guest_page));
+		dev->guest_pages = rte_zmalloc(NULL,
+					dev->max_guest_pages *
+					sizeof(struct guest_page),
+					RTE_CACHE_LINE_SIZE);
 		if (dev->guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR,
 				"(%d) failed to allocate memory "
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table
  2020-04-29  1:04 ` [dpdk-dev] [PATCH v4 1/2] " Marvin Liu
@ 2020-04-29  1:04   ` Marvin Liu
  2020-04-29 11:50     ` Maxime Coquelin
  0 siblings, 1 reply; 31+ messages in thread
From: Marvin Liu @ 2020-04-29  1:04 UTC (permalink / raw)
  To: maxime.coquelin, xiaolong.ye, zhihong.wang; +Cc: dev, Marvin Liu

If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now utilize binary search to find the entry in mapping
table, meanwhile set the threshold to 256 entries for linear search.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 507dbf214..998e133ad 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -546,20 +546,48 @@ extern int vhost_data_log_level;
 #define MAX_VHOST_DEVICE	1024
 extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 
+#define VHOST_BINARY_SEARCH_THRESH 256
+
+static __rte_always_inline int guest_page_addrcmp(const void *p1,
+						const void *p2)
+{
+	const struct guest_page *page1 = (const struct guest_page *)p1;
+	const struct guest_page *page2 = (const struct guest_page *)p2;
+
+	if (page1->guest_phys_addr > page2->guest_phys_addr)
+		return 1;
+	if (page1->guest_phys_addr < page2->guest_phys_addr)
+		return -1;
+
+	return 0;
+}
+
 /* Convert guest physical address to host physical address */
 static __rte_always_inline rte_iova_t
 gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 {
 	uint32_t i;
 	struct guest_page *page;
-
-	for (i = 0; i < dev->nr_guest_pages; i++) {
-		page = &dev->guest_pages[i];
-
-		if (gpa >= page->guest_phys_addr &&
-		    gpa + size < page->guest_phys_addr + page->size) {
-			return gpa - page->guest_phys_addr +
-			       page->host_phys_addr;
+	struct guest_page key;
+
+	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
+		key.guest_phys_addr = gpa;
+		page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
+			       sizeof(struct guest_page), guest_page_addrcmp);
+		if (page) {
+			if (gpa + size < page->guest_phys_addr + page->size)
+				return gpa - page->guest_phys_addr +
+					page->host_phys_addr;
+		}
+	} else {
+		for (i = 0; i < dev->nr_guest_pages; i++) {
+			page = &dev->guest_pages[i];
+
+			if (gpa >= page->guest_phys_addr &&
+			    gpa + size < page->guest_phys_addr +
+			    page->size)
+				return gpa - page->guest_phys_addr +
+				       page->host_phys_addr;
 		}
 	}
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 79fcb9d19..15e50d27d 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -965,6 +965,12 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		reg_size -= size;
 	}
 
+	/* sort guest page array if over binary search threshold */
+	if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
+		qsort((void *)dev->guest_pages, dev->nr_guest_pages,
+			sizeof(struct guest_page), guest_page_addrcmp);
+	}
+
 	return 0;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-29  1:01   ` [dpdk-dev] [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
@ 2020-04-29  1:06     ` Liu, Yong
  2020-04-29 17:47       ` Maxime Coquelin
  0 siblings, 1 reply; 31+ messages in thread
From: Liu, Yong @ 2020-04-29  1:06 UTC (permalink / raw)
  To: maxime.coquelin, Ye, Xiaolong, Wang, Zhihong; +Cc: dev

This is sent by mistake, please ignore.

> -----Original Message-----
> From: Liu, Yong <yong.liu@intel.com>
> Sent: Wednesday, April 29, 2020 9:01 AM
> To: maxime.coquelin@redhat.com; Ye, Xiaolong <xiaolong.ye@intel.com>;
> Wang, Zhihong <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; Liu, Yong <yong.liu@intel.com>
> Subject: [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator
> 
> Replace dynamic memory allocator with dpdk memory allocator.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index bd1be0104..79fcb9d19 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -191,7 +191,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
>  		dev->mem = NULL;
>  	}
> 
> -	free(dev->guest_pages);
> +	rte_free(dev->guest_pages);
>  	dev->guest_pages = NULL;
> 
>  	if (dev->log_addr) {
> @@ -903,11 +903,12 @@ add_one_guest_page(struct virtio_net *dev,
> uint64_t guest_phys_addr,
>  	if (dev->nr_guest_pages == dev->max_guest_pages) {
>  		dev->max_guest_pages *= 2;
>  		old_pages = dev->guest_pages;
> -		dev->guest_pages = realloc(dev->guest_pages,
> -					dev->max_guest_pages *
> sizeof(*page));
> -		if (!dev->guest_pages) {
> +		dev->guest_pages = rte_realloc(dev->guest_pages,
> +					dev->max_guest_pages *
> sizeof(*page),
> +					RTE_CACHE_LINE_SIZE);
> +		if (dev->guest_pages == NULL) {
>  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> guest_pages\n");
> -			free(old_pages);
> +			rte_free(old_pages);
>  			return -1;
>  		}
>  	}
> @@ -1062,10 +1063,12 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
>  			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
> 
>  	dev->nr_guest_pages = 0;
> -	if (!dev->guest_pages) {
> +	if (dev->guest_pages == NULL) {
>  		dev->max_guest_pages = 8;
> -		dev->guest_pages = malloc(dev->max_guest_pages *
> -						sizeof(struct guest_page));
> +		dev->guest_pages = rte_zmalloc(NULL,
> +					dev->max_guest_pages *
> +					sizeof(struct guest_page),
> +					RTE_CACHE_LINE_SIZE);
>  		if (dev->guest_pages == NULL) {
>  			VHOST_LOG_CONFIG(ERR,
>  				"(%d) failed to allocate memory "
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table
  2020-04-29  1:04   ` [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table Marvin Liu
@ 2020-04-29 11:50     ` Maxime Coquelin
  0 siblings, 0 replies; 31+ messages in thread
From: Maxime Coquelin @ 2020-04-29 11:50 UTC (permalink / raw)
  To: Marvin Liu, xiaolong.ye, zhihong.wang; +Cc: dev



On 4/29/20 3:04 AM, Marvin Liu wrote:
> If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> one. This will harm performance when guest memory backend using 2M
> hugepages. Now utilize binary search to find the entry in mapping
> table, meanwhile set the threshold to 256 entries for linear search.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator
  2020-04-29  1:06     ` Liu, Yong
@ 2020-04-29 17:47       ` Maxime Coquelin
  0 siblings, 0 replies; 31+ messages in thread
From: Maxime Coquelin @ 2020-04-29 17:47 UTC (permalink / raw)
  To: Liu, Yong, Ye, Xiaolong, Wang, Zhihong; +Cc: dev



On 4/29/20 3:06 AM, Liu, Yong wrote:
> This is sent by mistake, please ignore.
> 
>> -----Original Message-----
>> From: Liu, Yong <yong.liu@intel.com>
>> Sent: Wednesday, April 29, 2020 9:01 AM
>> To: maxime.coquelin@redhat.com; Ye, Xiaolong <xiaolong.ye@intel.com>;
>> Wang, Zhihong <zhihong.wang@intel.com>
>> Cc: dev@dpdk.org; Liu, Yong <yong.liu@intel.com>
>> Subject: [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator
>>
>> Replace dynamic memory allocator with dpdk memory allocator.
>>
>> Signed-off-by: Marvin Liu <yong.liu@intel.com>
>> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>
Applied to dpdk-next-virtio/master.

Thanks,
Maxime


^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2020-04-29 17:48 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-16 15:33 [dpdk-dev] [PATCH] vhost: cache guest/vhost physical address mapping Marvin Liu
2020-03-16 13:48 ` Ye Xiaolong
2020-03-17  1:01   ` Liu, Yong
2020-04-01 14:50 ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
2020-04-01 10:08   ` Gavin Hu
2020-04-01 14:50   ` [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation Marvin Liu
2020-04-01 10:07     ` Gavin Hu
2020-04-01 13:01       ` Liu, Yong
2020-04-02  3:04         ` Gavin Hu
2020-04-02  4:45           ` Liu, Yong
2020-04-03  8:22         ` Ma, LihongX
2020-04-02  2:57     ` Ye Xiaolong
2020-04-27  8:45     ` Maxime Coquelin
2020-04-28  0:44       ` Liu, Yong
2020-04-02  2:57   ` [dpdk-dev] [PATCH v2 1/2] vhost: utilize dpdk dynamic memory allocator Ye Xiaolong
2020-04-03  8:22   ` Ma, LihongX
2020-04-15 11:15   ` Maxime Coquelin
2020-04-28  9:13 ` [dpdk-dev] [PATCH v3 " Marvin Liu
2020-04-28  9:13   ` [dpdk-dev] [PATCH v3 2/2] vhost: binary search address mapping table Marvin Liu
2020-04-28 15:28     ` Maxime Coquelin
2020-04-28 15:38       ` Liu, Yong
2020-04-28 12:51   ` [dpdk-dev] [PATCH v3 1/2] vhost: utilize dpdk dynamic memory allocator Maxime Coquelin
2020-04-29  1:00 ` [dpdk-dev] [PATCH v4 1/2] net/virtio: add support Virtio link speed feature Marvin Liu
2020-04-29  1:00   ` [dpdk-dev] [PATCH v4 1/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
2020-04-29  1:00   ` [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table Marvin Liu
2020-04-29  1:01   ` [dpdk-dev] [PATCH v4 2/2] vhost: utilize dpdk dynamic memory allocator Marvin Liu
2020-04-29  1:06     ` Liu, Yong
2020-04-29 17:47       ` Maxime Coquelin
2020-04-29  1:04 ` [dpdk-dev] [PATCH v4 1/2] " Marvin Liu
2020-04-29  1:04   ` [dpdk-dev] [PATCH v4 2/2] vhost: binary search address mapping table Marvin Liu
2020-04-29 11:50     ` Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).