Re: [dpdk-dev] [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue

DPDK patches and discussions
 help / color / mirror / Atom feed

From: "Xia, Chenbo" <chenbo.xia@intel.com>
To: Maxime Coquelin <maxime.coquelin@redhat.com>,
	"dev@dpdk.org" <dev@dpdk.org>,
	"david.marchand@redhat.com" <david.marchand@redhat.com>
Subject: Re: [dpdk-dev] [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue
Date: Fri, 25 Jun 2021 07:26:40 +0000	[thread overview]
Message-ID: <MN2PR11MB40634F98EB43F2B9EDFE14E79C069@MN2PR11MB4063.namprd11.prod.outlook.com> (raw)
In-Reply-To: <20210618140357.255995-7-maxime.coquelin@redhat.com>

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Friday, June 18, 2021 10:04 PM
> To: dev@dpdk.org; david.marchand@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue
> 
> This patch saves the NUMA node the virtqueue is allocated
> on at init time, in order to allocate all other data on the
> same node.
> 
> While most of the data are allocated before numa_realloc()
> is called and so the data will be reallocated properly, some
> data like the log cache are most likely allocated after.
> 
> For the virtio device metadata, we decide to allocate them
> on the same node as the VQ 0.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/vhost/vhost.c      | 34 ++++++++++++++++------------------
>  lib/vhost/vhost.h      |  1 +
>  lib/vhost/vhost_user.c | 41 ++++++++++++++++++++++++++++-------------
>  3 files changed, 45 insertions(+), 31 deletions(-)
> 
> diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> index c96f6335c8..0000cd3297 100644
> --- a/lib/vhost/vhost.c
> +++ b/lib/vhost/vhost.c
> @@ -261,7 +261,7 @@ vhost_alloc_copy_ind_table(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>  	uint64_t src, dst;
>  	uint64_t len, remain = desc_len;
> 
> -	idesc = rte_malloc(__func__, desc_len, 0);
> +	idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
>  	if (unlikely(!idesc))
>  		return NULL;
> 
> @@ -549,6 +549,7 @@ static void
>  init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>  {
>  	struct vhost_virtqueue *vq;
> +	int numa_node = SOCKET_ID_ANY;
> 
>  	if (vring_idx >= VHOST_MAX_VRING) {
>  		VHOST_LOG_CONFIG(ERR,
> @@ -570,6 +571,15 @@ init_vring_queue(struct virtio_net *dev, uint32_t
> vring_idx)
>  	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
>  	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
> 
> +#ifdef RTE_LIBRTE_VHOST_NUMA
> +	if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
> +		VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
> +			dev->vid, rte_strerror(errno));
> +		numa_node = SOCKET_ID_ANY;
> +	}
> +#endif
> +	vq->numa_node = numa_node;
> +
>  	vhost_user_iotlb_init(dev, vring_idx);
>  }
> 
> @@ -1616,7 +1626,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>  	struct vhost_virtqueue *vq;
>  	struct virtio_net *dev = get_device(vid);
>  	struct rte_vhost_async_features f;
> -	int node;
> 
>  	if (dev == NULL || ops == NULL)
>  		return -1;
> @@ -1651,20 +1660,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>  		goto reg_out;
>  	}
> 
> -#ifdef RTE_LIBRTE_VHOST_NUMA
> -	if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
> -		VHOST_LOG_CONFIG(ERR,
> -			"unable to get numa information in async register. "
> -			"allocating async buffer memory on the caller thread
> node\n");
> -		node = SOCKET_ID_ANY;
> -	}
> -#else
> -	node = SOCKET_ID_ANY;
> -#endif
> -
>  	vq->async_pkts_info = rte_malloc_socket(NULL,
>  			vq->size * sizeof(struct async_inflight_info),
> -			RTE_CACHE_LINE_SIZE, node);
> +			RTE_CACHE_LINE_SIZE, vq->numa_node);
>  	if (!vq->async_pkts_info) {
>  		vhost_free_async_mem(vq);
>  		VHOST_LOG_CONFIG(ERR,
> @@ -1675,7 +1673,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> 
>  	vq->it_pool = rte_malloc_socket(NULL,
>  			VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
> -			RTE_CACHE_LINE_SIZE, node);
> +			RTE_CACHE_LINE_SIZE, vq->numa_node);
>  	if (!vq->it_pool) {
>  		vhost_free_async_mem(vq);
>  		VHOST_LOG_CONFIG(ERR,
> @@ -1686,7 +1684,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> 
>  	vq->vec_pool = rte_malloc_socket(NULL,
>  			VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
> -			RTE_CACHE_LINE_SIZE, node);
> +			RTE_CACHE_LINE_SIZE, vq->numa_node);
>  	if (!vq->vec_pool) {
>  		vhost_free_async_mem(vq);
>  		VHOST_LOG_CONFIG(ERR,
> @@ -1698,7 +1696,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>  	if (vq_is_packed(dev)) {
>  		vq->async_buffers_packed = rte_malloc_socket(NULL,
>  			vq->size * sizeof(struct vring_used_elem_packed),
> -			RTE_CACHE_LINE_SIZE, node);
> +			RTE_CACHE_LINE_SIZE, vq->numa_node);
>  		if (!vq->async_buffers_packed) {
>  			vhost_free_async_mem(vq);
>  			VHOST_LOG_CONFIG(ERR,
> @@ -1709,7 +1707,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>  	} else {
>  		vq->async_descs_split = rte_malloc_socket(NULL,
>  			vq->size * sizeof(struct vring_used_elem),
> -			RTE_CACHE_LINE_SIZE, node);
> +			RTE_CACHE_LINE_SIZE, vq->numa_node);
>  		if (!vq->async_descs_split) {
>  			vhost_free_async_mem(vq);
>  			VHOST_LOG_CONFIG(ERR,
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 8078ddff79..8ffe387556 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -164,6 +164,7 @@ struct vhost_virtqueue {
> 
>  	uint16_t		batch_copy_nb_elems;
>  	struct batch_copy_elem	*batch_copy_elems;
> +	int			numa_node;
>  	bool			used_wrap_counter;
>  	bool			avail_wrap_counter;
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index d6ec4000c3..d8ec087dfc 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -433,10 +433,10 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
>  	if (vq_is_packed(dev)) {
>  		if (vq->shadow_used_packed)
>  			rte_free(vq->shadow_used_packed);
> -		vq->shadow_used_packed = rte_malloc(NULL,
> +		vq->shadow_used_packed = rte_malloc_socket(NULL,
>  				vq->size *
>  				sizeof(struct vring_used_elem_packed),
> -				RTE_CACHE_LINE_SIZE);
> +				RTE_CACHE_LINE_SIZE, vq->numa_node);
>  		if (!vq->shadow_used_packed) {
>  			VHOST_LOG_CONFIG(ERR,
>  					"failed to allocate memory for shadow used
> ring.\n");
> @@ -447,9 +447,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
>  		if (vq->shadow_used_split)
>  			rte_free(vq->shadow_used_split);
> 
> -		vq->shadow_used_split = rte_malloc(NULL,
> +		vq->shadow_used_split = rte_malloc_socket(NULL,
>  				vq->size * sizeof(struct vring_used_elem),
> -				RTE_CACHE_LINE_SIZE);
> +				RTE_CACHE_LINE_SIZE, vq->numa_node);
> 
>  		if (!vq->shadow_used_split) {
>  			VHOST_LOG_CONFIG(ERR,
> @@ -460,9 +460,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
> 
>  	if (vq->batch_copy_elems)
>  		rte_free(vq->batch_copy_elems);
> -	vq->batch_copy_elems = rte_malloc(NULL,
> +	vq->batch_copy_elems = rte_malloc_socket(NULL,
>  				vq->size * sizeof(struct batch_copy_elem),
> -				RTE_CACHE_LINE_SIZE);
> +				RTE_CACHE_LINE_SIZE, vq->numa_node);
>  	if (!vq->batch_copy_elems) {
>  		VHOST_LOG_CONFIG(ERR,
>  			"failed to allocate memory for batching copy.\n");
> @@ -505,6 +505,9 @@ numa_realloc(struct virtio_net *dev, int index)
>  		return dev;
>  	}
> 
> +	if (node == vq->numa_node)
> +		goto out_dev_realloc;
> +
>  	vq = rte_realloc_socket(vq, sizeof(*vq), 0, node);
>  	if (!vq) {
>  		VHOST_LOG_CONFIG(ERR, "Failed to realloc virtqueue %d on
> node %d\n",
> @@ -559,6 +562,10 @@ numa_realloc(struct virtio_net *dev, int index)
>  		vq->log_cache = lc;
>  	}
> 
> +	vq->numa_node = node;
> +
> +out_dev_realloc:
> +
>  	if (dev->flags & VIRTIO_DEV_RUNNING)
>  		return dev;
> 
> @@ -1213,7 +1220,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>  	struct virtio_net *dev = *pdev;
>  	struct VhostUserMemory *memory = &msg->payload.memory;
>  	struct rte_vhost_mem_region *reg;
> -
> +	int numa_node = SOCKET_ID_ANY;
>  	uint64_t mmap_offset;
>  	uint32_t i;
> 
> @@ -1253,13 +1260,21 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>  		for (i = 0; i < dev->nr_vring; i++)
>  			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
> 
> +	/*
> +	 * If VQ 0 has already been allocated, try to allocate on the same
> +	 * NUMA node. It can be reallocated later in numa_realloc().
> +	 */
> +	if (dev->nr_vring > 0)
> +		numa_node = dev->virtqueue[0]->numa_node;
> +
>  	dev->nr_guest_pages = 0;
>  	if (dev->guest_pages == NULL) {
>  		dev->max_guest_pages = 8;
> -		dev->guest_pages = rte_zmalloc(NULL,
> +		dev->guest_pages = rte_zmalloc_socket(NULL,
>  					dev->max_guest_pages *
>  					sizeof(struct guest_page),
> -					RTE_CACHE_LINE_SIZE);
> +					RTE_CACHE_LINE_SIZE,
> +					numa_node);
>  		if (dev->guest_pages == NULL) {
>  			VHOST_LOG_CONFIG(ERR,
>  				"(%d) failed to allocate memory "
> @@ -1269,8 +1284,8 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>  		}
>  	}
> 
> -	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory)
> +
> -		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> +	dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
> +		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0,
> numa_node);
>  	if (dev->mem == NULL) {
>  		VHOST_LOG_CONFIG(ERR,
>  			"(%d) failed to allocate memory for dev->mem\n",
> @@ -2193,9 +2208,9 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct
> VhostUserMsg *msg,
>  		rte_free(vq->log_cache);
>  		vq->log_cache = NULL;
>  		vq->log_cache_nb_elem = 0;
> -		vq->log_cache = rte_zmalloc("vq log cache",
> +		vq->log_cache = rte_malloc_socket("vq log cache",
>  				sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
> -				0);
> +				0, vq->numa_node);
>  		/*
>  		 * If log cache alloc fail, don't fail migration, but no
>  		 * caching will be done, which will impact performance
> --
> 2.31.1

Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>

next prev parent reply	other threads:[~2021-06-25  7:26 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-18 14:03 [dpdk-dev] [PATCH v6 0/7] vhost: Fix and improve NUMA reallocation Maxime Coquelin
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 1/7] vhost: fix missing memory table NUMA realloc Maxime Coquelin
2021-06-25  2:26   ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 2/7] vhost: fix missing guest pages " Maxime Coquelin
2021-06-25  2:26   ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 3/7] vhost: fix missing cache logging " Maxime Coquelin
2021-06-25  2:50   ` Xia, Chenbo
2021-06-29 14:38     ` Maxime Coquelin
2021-06-30  8:50       ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 4/7] vhost: fix NUMA reallocation with multiqueue Maxime Coquelin
2021-06-25  2:56   ` Xia, Chenbo
2021-06-25 11:37     ` Xia, Chenbo
2021-06-29 14:35       ` Maxime Coquelin
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 5/7] vhost: improve NUMA reallocation Maxime Coquelin
2021-06-25  7:26   ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue Maxime Coquelin
2021-06-25  7:26   ` Xia, Chenbo [this message]
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 7/7] vhost: convert inflight data to DPDK allocation API Maxime Coquelin
2021-06-25  7:26   ` Xia, Chenbo
2021-06-29 14:36     ` Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=MN2PR11MB40634F98EB43F2B9EDFE14E79C069@MN2PR11MB4063.namprd11.prod.outlook.com \
    --to=chenbo.xia@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    --cc=maxime.coquelin@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).