DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly
@ 2020-12-22 13:56 Maxime Coquelin
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field Maxime Coquelin
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Maxime Coquelin @ 2020-12-22 13:56 UTC (permalink / raw)
  To: dev, chenbo.xia, amorenoz; +Cc: Maxime Coquelin

As done for Virtio PMD, this series improves cache utilization
of the vhost_virtqueue struct by removing unused field,
make the live-migration cache dynamically allocated at
live-migration setup time and by moving fields
around so that hot fields are on the first cachelines.

With this series, The struct vhost_virtqueue size goes
from 832B (13 cachelines) down to 320B (5 cachelines).

With this series and the virtio one, I measure a gain
of up to 8% in IO loop micro-benchmark with packed
ring, and 5% with split ring.

I don't have a setup at hand to run PVP testing, but
it might be interresting to get the numbers as I
suspect the cache pressure is higher in this test as
in real use-cases.

Maxime Coquelin (3):
  vhost: remove unused Vhost virtqueue field
  vhost: move dirty logging cache out of the virtqueue
  vhost: optimize vhost virtqueue struct

 lib/librte_vhost/vhost.c      | 14 +++++++--
 lib/librte_vhost/vhost.h      | 54 +++++++++++++++++------------------
 lib/librte_vhost/vhost_user.c | 25 ++++++++++++++++
 3 files changed, 64 insertions(+), 29 deletions(-)

-- 
2.29.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field
  2020-12-22 13:56 [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin
@ 2020-12-22 13:56 ` Maxime Coquelin
  2021-01-08  4:59   ` Xia, Chenbo
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue Maxime Coquelin
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Maxime Coquelin @ 2020-12-22 13:56 UTC (permalink / raw)
  To: dev, chenbo.xia, amorenoz; +Cc: Maxime Coquelin

This patch removes the "backend" field of the
vhost_virtqueue struct, which is not used by the
library.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.c | 2 --
 lib/librte_vhost/vhost.h | 2 --
 2 files changed, 4 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index b83cf639eb..4e5df862aa 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -557,8 +557,6 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
 
 	vhost_user_iotlb_init(dev, vring_idx);
-	/* Backends are set to -1 indicating an inactive device. */
-	vq->backend = -1;
 }
 
 static void
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 361c9f79b3..d132e4ae54 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -143,8 +143,6 @@ struct vhost_virtqueue {
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 
-	/* Backend value to determine if device should started/stopped */
-	int			backend;
 	int			enabled;
 	int			access_ok;
 	int			ready;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue
  2020-12-22 13:56 [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field Maxime Coquelin
@ 2020-12-22 13:56 ` Maxime Coquelin
  2021-01-08  7:20   ` Xia, Chenbo
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 3/3] vhost: optimize vhost virtqueue struct Maxime Coquelin
  2021-01-25 17:30 ` [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin
  3 siblings, 1 reply; 9+ messages in thread
From: Maxime Coquelin @ 2020-12-22 13:56 UTC (permalink / raw)
  To: dev, chenbo.xia, amorenoz; +Cc: Maxime Coquelin

This patch moves the per-virtqueue's dirty logging cache
out of the virtqueue struct, by allocating it dynamically
only when live-migration is enabled.

It saves 8 cachelines in vhost_virtqueue struct.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.c      | 12 ++++++++++++
 lib/librte_vhost/vhost.h      |  2 +-
 lib/librte_vhost/vhost_user.c | 25 +++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 4e5df862aa..ec6459b2d1 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -144,6 +144,10 @@ __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	if (unlikely(!dev->log_base))
 		return;
 
+	/* No cache, nothing to sync */
+	if (unlikely(!vq->log_cache))
+		return;
+
 	rte_smp_wmb();
 
 	log_base = (unsigned long *)(uintptr_t)dev->log_base;
@@ -176,6 +180,14 @@ vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint32_t offset = page / (sizeof(unsigned long) << 3);
 	int i;
 
+	if (unlikely(!vq->log_cache)) {
+		/* No logging cache allocated, write dirty log map directly */
+		rte_smp_wmb();
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+
+		return;
+	}
+
 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
 		struct log_cache_entry *elem = vq->log_cache + i;
 
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index d132e4ae54..e2f14034b4 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -183,7 +183,7 @@ struct vhost_virtqueue {
 	bool			used_wrap_counter;
 	bool			avail_wrap_counter;
 
-	struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
+	struct log_cache_entry *log_cache;
 	uint16_t log_cache_nb_elem;
 
 	rte_rwlock_t	iotlb_lock;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 45c8ac09da..7ac3963a07 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1978,6 +1978,11 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
 	rte_free(vq->batch_copy_elems);
 	vq->batch_copy_elems = NULL;
 
+	if (vq->log_cache) {
+		rte_free(vq->log_cache);
+		vq->log_cache = NULL;
+	}
+
 	msg->size = sizeof(msg->payload.state);
 	msg->fd_num = 0;
 
@@ -2077,6 +2082,7 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg,
 	int fd = msg->fds[0];
 	uint64_t size, off;
 	void *addr;
+	uint32_t i;
 
 	if (validate_msg_fds(msg, 1) != 0)
 		return RTE_VHOST_MSG_RESULT_ERR;
@@ -2130,6 +2136,25 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg,
 	dev->log_base = dev->log_addr + off;
 	dev->log_size = size;
 
+	for (i = 0; i < dev->nr_vring; i++) {
+		struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+		if (vq->log_cache) {
+			rte_free(vq->log_cache);
+			vq->log_cache = NULL;
+		}
+		vq->log_cache_nb_elem = 0;
+		vq->log_cache = rte_zmalloc("vq log cache",
+				sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
+				0);
+		/*
+		 * If log cache alloc fail, don't fail migration, but no
+		 * caching will be done, which will impact performance
+		 */
+		if (!vq->log_cache)
+			VHOST_LOG_CONFIG(ERR, "Failed to allocate VQ logging cache\n");
+	}
+
 	/*
 	 * The spec is not clear about it (yet), but QEMU doesn't expect
 	 * any payload in the reply.
-- 
2.29.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH 3/3] vhost: optimize vhost virtqueue struct
  2020-12-22 13:56 [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field Maxime Coquelin
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue Maxime Coquelin
@ 2020-12-22 13:56 ` Maxime Coquelin
  2021-01-08  7:28   ` Xia, Chenbo
  2021-01-25 17:30 ` [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin
  3 siblings, 1 reply; 9+ messages in thread
From: Maxime Coquelin @ 2020-12-22 13:56 UTC (permalink / raw)
  To: dev, chenbo.xia, amorenoz; +Cc: Maxime Coquelin

This patch moves vhost_virtuqueue struct fields in order
to both optimize packing and move hot fields on the first
cachelines.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.h | 52 +++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index e2f14034b4..ce76330d15 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -133,7 +133,7 @@ struct vhost_virtqueue {
 		struct vring_used	*used;
 		struct vring_packed_desc_event *device_event;
 	};
-	uint32_t		size;
+	uint16_t		size;
 
 	uint16_t		last_avail_idx;
 	uint16_t		last_used_idx;
@@ -143,29 +143,12 @@ struct vhost_virtqueue {
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 
-	int			enabled;
-	int			access_ok;
-	int			ready;
-	int			notif_enable;
-#define VIRTIO_UNINITIALIZED_NOTIF	(-1)
+	bool			enabled;
+	bool			access_ok;
+	bool			ready;
 
 	rte_spinlock_t		access_lock;
 
-	/* Used to notify the guest (trigger interrupt) */
-	int			callfd;
-	/* Currently unused as polling mode is enabled */
-	int			kickfd;
-
-	/* Physical address of used ring, for logging */
-	uint64_t		log_guest_addr;
-
-	/* inflight share memory info */
-	union {
-		struct rte_vhost_inflight_info_split *inflight_split;
-		struct rte_vhost_inflight_info_packed *inflight_packed;
-	};
-	struct rte_vhost_resubmit_info *resubmit_inflight;
-	uint64_t		global_counter;
 
 	union {
 		struct vring_used_elem  *shadow_used_split;
@@ -176,22 +159,36 @@ struct vhost_virtqueue {
 	uint16_t		shadow_aligned_idx;
 	/* Record packed ring first dequeue desc index */
 	uint16_t		shadow_last_used_idx;
-	struct vhost_vring_addr ring_addrs;
 
-	struct batch_copy_elem	*batch_copy_elems;
 	uint16_t		batch_copy_nb_elems;
+	struct batch_copy_elem	*batch_copy_elems;
 	bool			used_wrap_counter;
 	bool			avail_wrap_counter;
 
-	struct log_cache_entry *log_cache;
+	/* Physical address of used ring, for logging */
 	uint16_t log_cache_nb_elem;
+	uint64_t		log_guest_addr;
+	struct log_cache_entry *log_cache;
 
 	rte_rwlock_t	iotlb_lock;
 	rte_rwlock_t	iotlb_pending_lock;
 	struct rte_mempool *iotlb_pool;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
-	int				iotlb_cache_nr;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
+	int				iotlb_cache_nr;
+
+	/* Used to notify the guest (trigger interrupt) */
+	int			callfd;
+	/* Currently unused as polling mode is enabled */
+	int			kickfd;
+
+	/* inflight share memory info */
+	union {
+		struct rte_vhost_inflight_info_split *inflight_split;
+		struct rte_vhost_inflight_info_packed *inflight_packed;
+	};
+	struct rte_vhost_resubmit_info *resubmit_inflight;
+	uint64_t		global_counter;
 
 	/* operation callbacks for async dma */
 	struct rte_vhost_async_channel_ops	async_ops;
@@ -210,6 +207,11 @@ struct vhost_virtqueue {
 	bool		async_inorder;
 	bool		async_registered;
 	uint16_t	async_threshold;
+
+	int			notif_enable;
+#define VIRTIO_UNINITIALIZED_NOTIF	(-1)
+
+	struct vhost_vring_addr ring_addrs;
 } __rte_cache_aligned;
 
 /* Virtio device status as per Virtio specification */
-- 
2.29.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field Maxime Coquelin
@ 2021-01-08  4:59   ` Xia, Chenbo
  0 siblings, 0 replies; 9+ messages in thread
From: Xia, Chenbo @ 2021-01-08  4:59 UTC (permalink / raw)
  To: Maxime Coquelin, dev, amorenoz

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, December 22, 2020 9:57 PM
> To: dev@dpdk.org; Xia, Chenbo <chenbo.xia@intel.com>; amorenoz@redhat.com
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH 1/3] vhost: remove unused Vhost virtqueue field
> 
> This patch removes the "backend" field of the
> vhost_virtqueue struct, which is not used by the
> library.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_vhost/vhost.c | 2 --
>  lib/librte_vhost/vhost.h | 2 --
>  2 files changed, 4 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index b83cf639eb..4e5df862aa 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -557,8 +557,6 @@ init_vring_queue(struct virtio_net *dev, uint32_t
> vring_idx)
>  	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
> 
>  	vhost_user_iotlb_init(dev, vring_idx);
> -	/* Backends are set to -1 indicating an inactive device. */
> -	vq->backend = -1;
>  }
> 
>  static void
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 361c9f79b3..d132e4ae54 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -143,8 +143,6 @@ struct vhost_virtqueue {
>  #define VIRTIO_INVALID_EVENTFD		(-1)
>  #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
> 
> -	/* Backend value to determine if device should started/stopped */
> -	int			backend;
>  	int			enabled;
>  	int			access_ok;
>  	int			ready;
> --
> 2.29.2

Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue Maxime Coquelin
@ 2021-01-08  7:20   ` Xia, Chenbo
  2021-03-16 12:25     ` Maxime Coquelin
  0 siblings, 1 reply; 9+ messages in thread
From: Xia, Chenbo @ 2021-01-08  7:20 UTC (permalink / raw)
  To: Maxime Coquelin, dev, amorenoz

Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, December 22, 2020 9:57 PM
> To: dev@dpdk.org; Xia, Chenbo <chenbo.xia@intel.com>; amorenoz@redhat.com
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue
> 
> This patch moves the per-virtqueue's dirty logging cache
> out of the virtqueue struct, by allocating it dynamically
> only when live-migration is enabled.
> 
> It saves 8 cachelines in vhost_virtqueue struct.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_vhost/vhost.c      | 12 ++++++++++++
>  lib/librte_vhost/vhost.h      |  2 +-
>  lib/librte_vhost/vhost_user.c | 25 +++++++++++++++++++++++++
>  3 files changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 4e5df862aa..ec6459b2d1 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -144,6 +144,10 @@ __vhost_log_cache_sync(struct virtio_net *dev, struct
> vhost_virtqueue *vq)
>  	if (unlikely(!dev->log_base))
>  		return;
> 
> +	/* No cache, nothing to sync */
> +	if (unlikely(!vq->log_cache))
> +		return;
> +
>  	rte_smp_wmb();
> 
>  	log_base = (unsigned long *)(uintptr_t)dev->log_base;
> @@ -176,6 +180,14 @@ vhost_log_cache_page(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>  	uint32_t offset = page / (sizeof(unsigned long) << 3);
>  	int i;
> 
> +	if (unlikely(!vq->log_cache)) {
> +		/* No logging cache allocated, write dirty log map directly */
> +		rte_smp_wmb();
> +		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
> +
> +		return;
> +	}
> +
>  	for (i = 0; i < vq->log_cache_nb_elem; i++) {
>  		struct log_cache_entry *elem = vq->log_cache + i;
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index d132e4ae54..e2f14034b4 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -183,7 +183,7 @@ struct vhost_virtqueue {
>  	bool			used_wrap_counter;
>  	bool			avail_wrap_counter;
> 
> -	struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
> +	struct log_cache_entry *log_cache;
>  	uint16_t log_cache_nb_elem;
> 
>  	rte_rwlock_t	iotlb_lock;
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 45c8ac09da..7ac3963a07 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -1978,6 +1978,11 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
>  	rte_free(vq->batch_copy_elems);
>  	vq->batch_copy_elems = NULL;
> 
> +	if (vq->log_cache) {
> +		rte_free(vq->log_cache);
> +		vq->log_cache = NULL;
> +	}
> +

I think we'd better check and free the log cache in function free_vq() too?
It is possible that during migration, vhost destroys, right?

Thanks,
Chenbo

>  	msg->size = sizeof(msg->payload.state);
>  	msg->fd_num = 0;
> 
> @@ -2077,6 +2082,7 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct
> VhostUserMsg *msg,
>  	int fd = msg->fds[0];
>  	uint64_t size, off;
>  	void *addr;
> +	uint32_t i;
> 
>  	if (validate_msg_fds(msg, 1) != 0)
>  		return RTE_VHOST_MSG_RESULT_ERR;
> @@ -2130,6 +2136,25 @@ vhost_user_set_log_base(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>  	dev->log_base = dev->log_addr + off;
>  	dev->log_size = size;
> 
> +	for (i = 0; i < dev->nr_vring; i++) {
> +		struct vhost_virtqueue *vq = dev->virtqueue[i];
> +
> +		if (vq->log_cache) {
> +			rte_free(vq->log_cache);
> +			vq->log_cache = NULL;
> +		}
> +		vq->log_cache_nb_elem = 0;
> +		vq->log_cache = rte_zmalloc("vq log cache",
> +				sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
> +				0);
> +		/*
> +		 * If log cache alloc fail, don't fail migration, but no
> +		 * caching will be done, which will impact performance
> +		 */
> +		if (!vq->log_cache)
> +			VHOST_LOG_CONFIG(ERR, "Failed to allocate VQ logging
> cache\n");
> +	}
> +
>  	/*
>  	 * The spec is not clear about it (yet), but QEMU doesn't expect
>  	 * any payload in the reply.
> --
> 2.29.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH 3/3] vhost: optimize vhost virtqueue struct
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 3/3] vhost: optimize vhost virtqueue struct Maxime Coquelin
@ 2021-01-08  7:28   ` Xia, Chenbo
  0 siblings, 0 replies; 9+ messages in thread
From: Xia, Chenbo @ 2021-01-08  7:28 UTC (permalink / raw)
  To: Maxime Coquelin, dev, amorenoz

Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, December 22, 2020 9:57 PM
> To: dev@dpdk.org; Xia, Chenbo <chenbo.xia@intel.com>; amorenoz@redhat.com
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH 3/3] vhost: optimize vhost virtqueue struct
> 
> This patch moves vhost_virtuqueue struct fields in order

s/virtuqueue/virtqueue

> to both optimize packing and move hot fields on the first
> cachelines.

'move hot fields to the first cacheline'? :)

> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_vhost/vhost.h | 52 +++++++++++++++++++++-------------------
>  1 file changed, 27 insertions(+), 25 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index e2f14034b4..ce76330d15 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -133,7 +133,7 @@ struct vhost_virtqueue {
>  		struct vring_used	*used;
>  		struct vring_packed_desc_event *device_event;
>  	};
> -	uint32_t		size;
> +	uint16_t		size;
> 
>  	uint16_t		last_avail_idx;
>  	uint16_t		last_used_idx;
> @@ -143,29 +143,12 @@ struct vhost_virtqueue {
>  #define VIRTIO_INVALID_EVENTFD		(-1)
>  #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
> 
> -	int			enabled;
> -	int			access_ok;
> -	int			ready;
> -	int			notif_enable;
> -#define VIRTIO_UNINITIALIZED_NOTIF	(-1)
> +	bool			enabled;
> +	bool			access_ok;
> +	bool			ready;
> 
>  	rte_spinlock_t		access_lock;
> 
> -	/* Used to notify the guest (trigger interrupt) */
> -	int			callfd;
> -	/* Currently unused as polling mode is enabled */
> -	int			kickfd;
> -
> -	/* Physical address of used ring, for logging */
> -	uint64_t		log_guest_addr;
> -
> -	/* inflight share memory info */
> -	union {
> -		struct rte_vhost_inflight_info_split *inflight_split;
> -		struct rte_vhost_inflight_info_packed *inflight_packed;
> -	};
> -	struct rte_vhost_resubmit_info *resubmit_inflight;
> -	uint64_t		global_counter;
> 
>  	union {
>  		struct vring_used_elem  *shadow_used_split;
> @@ -176,22 +159,36 @@ struct vhost_virtqueue {
>  	uint16_t		shadow_aligned_idx;
>  	/* Record packed ring first dequeue desc index */
>  	uint16_t		shadow_last_used_idx;
> -	struct vhost_vring_addr ring_addrs;
> 
> -	struct batch_copy_elem	*batch_copy_elems;
>  	uint16_t		batch_copy_nb_elems;
> +	struct batch_copy_elem	*batch_copy_elems;
>  	bool			used_wrap_counter;
>  	bool			avail_wrap_counter;
> 
> -	struct log_cache_entry *log_cache;
> +	/* Physical address of used ring, for logging */
>  	uint16_t log_cache_nb_elem;
> +	uint64_t		log_guest_addr;

Looks better to align 'log_cache_nb_elem' with 'log_guest_addr'?

Thanks,
Chenbo

> +	struct log_cache_entry *log_cache;
> 
>  	rte_rwlock_t	iotlb_lock;
>  	rte_rwlock_t	iotlb_pending_lock;
>  	struct rte_mempool *iotlb_pool;
>  	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
> -	int				iotlb_cache_nr;
>  	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
> +	int				iotlb_cache_nr;
> +
> +	/* Used to notify the guest (trigger interrupt) */
> +	int			callfd;
> +	/* Currently unused as polling mode is enabled */
> +	int			kickfd;
> +
> +	/* inflight share memory info */
> +	union {
> +		struct rte_vhost_inflight_info_split *inflight_split;
> +		struct rte_vhost_inflight_info_packed *inflight_packed;
> +	};
> +	struct rte_vhost_resubmit_info *resubmit_inflight;
> +	uint64_t		global_counter;
> 
>  	/* operation callbacks for async dma */
>  	struct rte_vhost_async_channel_ops	async_ops;
> @@ -210,6 +207,11 @@ struct vhost_virtqueue {
>  	bool		async_inorder;
>  	bool		async_registered;
>  	uint16_t	async_threshold;
> +
> +	int			notif_enable;
> +#define VIRTIO_UNINITIALIZED_NOTIF	(-1)
> +
> +	struct vhost_vring_addr ring_addrs;
>  } __rte_cache_aligned;
> 
>  /* Virtio device status as per Virtio specification */
> --
> 2.29.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly
  2020-12-22 13:56 [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin
                   ` (2 preceding siblings ...)
  2020-12-22 13:56 ` [dpdk-dev] [PATCH 3/3] vhost: optimize vhost virtqueue struct Maxime Coquelin
@ 2021-01-25 17:30 ` Maxime Coquelin
  3 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2021-01-25 17:30 UTC (permalink / raw)
  To: dev, chenbo.xia, amorenoz



On 12/22/20 2:56 PM, Maxime Coquelin wrote:
> As done for Virtio PMD, this series improves cache utilization
> of the vhost_virtqueue struct by removing unused field,
> make the live-migration cache dynamically allocated at
> live-migration setup time and by moving fields
> around so that hot fields are on the first cachelines.
> 
> With this series, The struct vhost_virtqueue size goes
> from 832B (13 cachelines) down to 320B (5 cachelines).
> 
> With this series and the virtio one, I measure a gain
> of up to 8% in IO loop micro-benchmark with packed
> ring, and 5% with split ring.
> 
> I don't have a setup at hand to run PVP testing, but
> it might be interresting to get the numbers as I
> suspect the cache pressure is higher in this test as
> in real use-cases.
> 
> Maxime Coquelin (3):
>   vhost: remove unused Vhost virtqueue field
>   vhost: move dirty logging cache out of the virtqueue
>   vhost: optimize vhost virtqueue struct
> 
>  lib/librte_vhost/vhost.c      | 14 +++++++--
>  lib/librte_vhost/vhost.h      | 54 +++++++++++++++++------------------
>  lib/librte_vhost/vhost_user.c | 25 ++++++++++++++++
>  3 files changed, 64 insertions(+), 29 deletions(-)
> 
Deferring to v21.05 release.

Maxime


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue
  2021-01-08  7:20   ` Xia, Chenbo
@ 2021-03-16 12:25     ` Maxime Coquelin
  0 siblings, 0 replies; 9+ messages in thread
From: Maxime Coquelin @ 2021-03-16 12:25 UTC (permalink / raw)
  To: Xia, Chenbo, dev, amorenoz

Hi Chenbo,

On 1/8/21 8:20 AM, Xia, Chenbo wrote:
> Hi Maxime,
> 
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Tuesday, December 22, 2020 9:57 PM
>> To: dev@dpdk.org; Xia, Chenbo <chenbo.xia@intel.com>; amorenoz@redhat.com
>> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Subject: [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue
>>
>> This patch moves the per-virtqueue's dirty logging cache
>> out of the virtqueue struct, by allocating it dynamically
>> only when live-migration is enabled.
>>
>> It saves 8 cachelines in vhost_virtqueue struct.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>>  lib/librte_vhost/vhost.c      | 12 ++++++++++++
>>  lib/librte_vhost/vhost.h      |  2 +-
>>  lib/librte_vhost/vhost_user.c | 25 +++++++++++++++++++++++++
>>  3 files changed, 38 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>> index 4e5df862aa..ec6459b2d1 100644
>> --- a/lib/librte_vhost/vhost.c
>> +++ b/lib/librte_vhost/vhost.c
>> @@ -144,6 +144,10 @@ __vhost_log_cache_sync(struct virtio_net *dev, struct
>> vhost_virtqueue *vq)
>>  	if (unlikely(!dev->log_base))
>>  		return;
>>
>> +	/* No cache, nothing to sync */
>> +	if (unlikely(!vq->log_cache))
>> +		return;
>> +
>>  	rte_smp_wmb();
>>
>>  	log_base = (unsigned long *)(uintptr_t)dev->log_base;
>> @@ -176,6 +180,14 @@ vhost_log_cache_page(struct virtio_net *dev, struct
>> vhost_virtqueue *vq,
>>  	uint32_t offset = page / (sizeof(unsigned long) << 3);
>>  	int i;
>>
>> +	if (unlikely(!vq->log_cache)) {
>> +		/* No logging cache allocated, write dirty log map directly */
>> +		rte_smp_wmb();
>> +		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
>> +
>> +		return;
>> +	}
>> +
>>  	for (i = 0; i < vq->log_cache_nb_elem; i++) {
>>  		struct log_cache_entry *elem = vq->log_cache + i;
>>
>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>> index d132e4ae54..e2f14034b4 100644
>> --- a/lib/librte_vhost/vhost.h
>> +++ b/lib/librte_vhost/vhost.h
>> @@ -183,7 +183,7 @@ struct vhost_virtqueue {
>>  	bool			used_wrap_counter;
>>  	bool			avail_wrap_counter;
>>
>> -	struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
>> +	struct log_cache_entry *log_cache;
>>  	uint16_t log_cache_nb_elem;
>>
>>  	rte_rwlock_t	iotlb_lock;
>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>> index 45c8ac09da..7ac3963a07 100644
>> --- a/lib/librte_vhost/vhost_user.c
>> +++ b/lib/librte_vhost/vhost_user.c
>> @@ -1978,6 +1978,11 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
>>  	rte_free(vq->batch_copy_elems);
>>  	vq->batch_copy_elems = NULL;
>>
>> +	if (vq->log_cache) {
>> +		rte_free(vq->log_cache);
>> +		vq->log_cache = NULL;
>> +	}
>> +
> 
> I think we'd better check and free the log cache in function free_vq() too?
> It is possible that during migration, vhost destroys, right?

Correct, I will do it too in free_vq().

Thanks!
Maxime

> Thanks,
> Chenbo
> 
>>  	msg->size = sizeof(msg->payload.state);
>>  	msg->fd_num = 0;
>>
>> @@ -2077,6 +2082,7 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct
>> VhostUserMsg *msg,
>>  	int fd = msg->fds[0];
>>  	uint64_t size, off;
>>  	void *addr;
>> +	uint32_t i;
>>
>>  	if (validate_msg_fds(msg, 1) != 0)
>>  		return RTE_VHOST_MSG_RESULT_ERR;
>> @@ -2130,6 +2136,25 @@ vhost_user_set_log_base(struct virtio_net **pdev,
>> struct VhostUserMsg *msg,
>>  	dev->log_base = dev->log_addr + off;
>>  	dev->log_size = size;
>>
>> +	for (i = 0; i < dev->nr_vring; i++) {
>> +		struct vhost_virtqueue *vq = dev->virtqueue[i];
>> +
>> +		if (vq->log_cache) {
>> +			rte_free(vq->log_cache);
>> +			vq->log_cache = NULL;
>> +		}
>> +		vq->log_cache_nb_elem = 0;
>> +		vq->log_cache = rte_zmalloc("vq log cache",
>> +				sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
>> +				0);
>> +		/*
>> +		 * If log cache alloc fail, don't fail migration, but no
>> +		 * caching will be done, which will impact performance
>> +		 */
>> +		if (!vq->log_cache)
>> +			VHOST_LOG_CONFIG(ERR, "Failed to allocate VQ logging
>> cache\n");
>> +	}
>> +
>>  	/*
>>  	 * The spec is not clear about it (yet), but QEMU doesn't expect
>>  	 * any payload in the reply.
>> --
>> 2.29.2
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-03-16 12:25 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-22 13:56 [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin
2020-12-22 13:56 ` [dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field Maxime Coquelin
2021-01-08  4:59   ` Xia, Chenbo
2020-12-22 13:56 ` [dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue Maxime Coquelin
2021-01-08  7:20   ` Xia, Chenbo
2021-03-16 12:25     ` Maxime Coquelin
2020-12-22 13:56 ` [dpdk-dev] [PATCH 3/3] vhost: optimize vhost virtqueue struct Maxime Coquelin
2021-01-08  7:28   ` Xia, Chenbo
2021-01-25 17:30 ` [dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly Maxime Coquelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).