From: "Xia, Chenbo" <chenbo.xia@intel.com>
To: Maxime Coquelin <maxime.coquelin@redhat.com>,
"dev@dpdk.org" <dev@dpdk.org>,
"david.marchand@redhat.com" <david.marchand@redhat.com>
Subject: Re: [dpdk-dev] [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue
Date: Fri, 25 Jun 2021 07:26:40 +0000 [thread overview]
Message-ID: <MN2PR11MB40634F98EB43F2B9EDFE14E79C069@MN2PR11MB4063.namprd11.prod.outlook.com> (raw)
In-Reply-To: <20210618140357.255995-7-maxime.coquelin@redhat.com>
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Friday, June 18, 2021 10:04 PM
> To: dev@dpdk.org; david.marchand@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue
>
> This patch saves the NUMA node the virtqueue is allocated
> on at init time, in order to allocate all other data on the
> same node.
>
> While most of the data are allocated before numa_realloc()
> is called and so the data will be reallocated properly, some
> data like the log cache are most likely allocated after.
>
> For the virtio device metadata, we decide to allocate them
> on the same node as the VQ 0.
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/vhost/vhost.c | 34 ++++++++++++++++------------------
> lib/vhost/vhost.h | 1 +
> lib/vhost/vhost_user.c | 41 ++++++++++++++++++++++++++++-------------
> 3 files changed, 45 insertions(+), 31 deletions(-)
>
> diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> index c96f6335c8..0000cd3297 100644
> --- a/lib/vhost/vhost.c
> +++ b/lib/vhost/vhost.c
> @@ -261,7 +261,7 @@ vhost_alloc_copy_ind_table(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> uint64_t src, dst;
> uint64_t len, remain = desc_len;
>
> - idesc = rte_malloc(__func__, desc_len, 0);
> + idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
> if (unlikely(!idesc))
> return NULL;
>
> @@ -549,6 +549,7 @@ static void
> init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
> {
> struct vhost_virtqueue *vq;
> + int numa_node = SOCKET_ID_ANY;
>
> if (vring_idx >= VHOST_MAX_VRING) {
> VHOST_LOG_CONFIG(ERR,
> @@ -570,6 +571,15 @@ init_vring_queue(struct virtio_net *dev, uint32_t
> vring_idx)
> vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
> vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
>
> +#ifdef RTE_LIBRTE_VHOST_NUMA
> + if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
> + VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
> + dev->vid, rte_strerror(errno));
> + numa_node = SOCKET_ID_ANY;
> + }
> +#endif
> + vq->numa_node = numa_node;
> +
> vhost_user_iotlb_init(dev, vring_idx);
> }
>
> @@ -1616,7 +1626,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> struct vhost_virtqueue *vq;
> struct virtio_net *dev = get_device(vid);
> struct rte_vhost_async_features f;
> - int node;
>
> if (dev == NULL || ops == NULL)
> return -1;
> @@ -1651,20 +1660,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> goto reg_out;
> }
>
> -#ifdef RTE_LIBRTE_VHOST_NUMA
> - if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
> - VHOST_LOG_CONFIG(ERR,
> - "unable to get numa information in async register. "
> - "allocating async buffer memory on the caller thread
> node\n");
> - node = SOCKET_ID_ANY;
> - }
> -#else
> - node = SOCKET_ID_ANY;
> -#endif
> -
> vq->async_pkts_info = rte_malloc_socket(NULL,
> vq->size * sizeof(struct async_inflight_info),
> - RTE_CACHE_LINE_SIZE, node);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
> if (!vq->async_pkts_info) {
> vhost_free_async_mem(vq);
> VHOST_LOG_CONFIG(ERR,
> @@ -1675,7 +1673,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>
> vq->it_pool = rte_malloc_socket(NULL,
> VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
> - RTE_CACHE_LINE_SIZE, node);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
> if (!vq->it_pool) {
> vhost_free_async_mem(vq);
> VHOST_LOG_CONFIG(ERR,
> @@ -1686,7 +1684,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>
> vq->vec_pool = rte_malloc_socket(NULL,
> VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
> - RTE_CACHE_LINE_SIZE, node);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
> if (!vq->vec_pool) {
> vhost_free_async_mem(vq);
> VHOST_LOG_CONFIG(ERR,
> @@ -1698,7 +1696,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> if (vq_is_packed(dev)) {
> vq->async_buffers_packed = rte_malloc_socket(NULL,
> vq->size * sizeof(struct vring_used_elem_packed),
> - RTE_CACHE_LINE_SIZE, node);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
> if (!vq->async_buffers_packed) {
> vhost_free_async_mem(vq);
> VHOST_LOG_CONFIG(ERR,
> @@ -1709,7 +1707,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> } else {
> vq->async_descs_split = rte_malloc_socket(NULL,
> vq->size * sizeof(struct vring_used_elem),
> - RTE_CACHE_LINE_SIZE, node);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
> if (!vq->async_descs_split) {
> vhost_free_async_mem(vq);
> VHOST_LOG_CONFIG(ERR,
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 8078ddff79..8ffe387556 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -164,6 +164,7 @@ struct vhost_virtqueue {
>
> uint16_t batch_copy_nb_elems;
> struct batch_copy_elem *batch_copy_elems;
> + int numa_node;
> bool used_wrap_counter;
> bool avail_wrap_counter;
>
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index d6ec4000c3..d8ec087dfc 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -433,10 +433,10 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
> if (vq_is_packed(dev)) {
> if (vq->shadow_used_packed)
> rte_free(vq->shadow_used_packed);
> - vq->shadow_used_packed = rte_malloc(NULL,
> + vq->shadow_used_packed = rte_malloc_socket(NULL,
> vq->size *
> sizeof(struct vring_used_elem_packed),
> - RTE_CACHE_LINE_SIZE);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
> if (!vq->shadow_used_packed) {
> VHOST_LOG_CONFIG(ERR,
> "failed to allocate memory for shadow used
> ring.\n");
> @@ -447,9 +447,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
> if (vq->shadow_used_split)
> rte_free(vq->shadow_used_split);
>
> - vq->shadow_used_split = rte_malloc(NULL,
> + vq->shadow_used_split = rte_malloc_socket(NULL,
> vq->size * sizeof(struct vring_used_elem),
> - RTE_CACHE_LINE_SIZE);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
>
> if (!vq->shadow_used_split) {
> VHOST_LOG_CONFIG(ERR,
> @@ -460,9 +460,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
>
> if (vq->batch_copy_elems)
> rte_free(vq->batch_copy_elems);
> - vq->batch_copy_elems = rte_malloc(NULL,
> + vq->batch_copy_elems = rte_malloc_socket(NULL,
> vq->size * sizeof(struct batch_copy_elem),
> - RTE_CACHE_LINE_SIZE);
> + RTE_CACHE_LINE_SIZE, vq->numa_node);
> if (!vq->batch_copy_elems) {
> VHOST_LOG_CONFIG(ERR,
> "failed to allocate memory for batching copy.\n");
> @@ -505,6 +505,9 @@ numa_realloc(struct virtio_net *dev, int index)
> return dev;
> }
>
> + if (node == vq->numa_node)
> + goto out_dev_realloc;
> +
> vq = rte_realloc_socket(vq, sizeof(*vq), 0, node);
> if (!vq) {
> VHOST_LOG_CONFIG(ERR, "Failed to realloc virtqueue %d on
> node %d\n",
> @@ -559,6 +562,10 @@ numa_realloc(struct virtio_net *dev, int index)
> vq->log_cache = lc;
> }
>
> + vq->numa_node = node;
> +
> +out_dev_realloc:
> +
> if (dev->flags & VIRTIO_DEV_RUNNING)
> return dev;
>
> @@ -1213,7 +1220,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
> struct virtio_net *dev = *pdev;
> struct VhostUserMemory *memory = &msg->payload.memory;
> struct rte_vhost_mem_region *reg;
> -
> + int numa_node = SOCKET_ID_ANY;
> uint64_t mmap_offset;
> uint32_t i;
>
> @@ -1253,13 +1260,21 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
> for (i = 0; i < dev->nr_vring; i++)
> vhost_user_iotlb_flush_all(dev->virtqueue[i]);
>
> + /*
> + * If VQ 0 has already been allocated, try to allocate on the same
> + * NUMA node. It can be reallocated later in numa_realloc().
> + */
> + if (dev->nr_vring > 0)
> + numa_node = dev->virtqueue[0]->numa_node;
> +
> dev->nr_guest_pages = 0;
> if (dev->guest_pages == NULL) {
> dev->max_guest_pages = 8;
> - dev->guest_pages = rte_zmalloc(NULL,
> + dev->guest_pages = rte_zmalloc_socket(NULL,
> dev->max_guest_pages *
> sizeof(struct guest_page),
> - RTE_CACHE_LINE_SIZE);
> + RTE_CACHE_LINE_SIZE,
> + numa_node);
> if (dev->guest_pages == NULL) {
> VHOST_LOG_CONFIG(ERR,
> "(%d) failed to allocate memory "
> @@ -1269,8 +1284,8 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
> }
> }
>
> - dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory)
> +
> - sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> + dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
> + sizeof(struct rte_vhost_mem_region) * memory->nregions, 0,
> numa_node);
> if (dev->mem == NULL) {
> VHOST_LOG_CONFIG(ERR,
> "(%d) failed to allocate memory for dev->mem\n",
> @@ -2193,9 +2208,9 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct
> VhostUserMsg *msg,
> rte_free(vq->log_cache);
> vq->log_cache = NULL;
> vq->log_cache_nb_elem = 0;
> - vq->log_cache = rte_zmalloc("vq log cache",
> + vq->log_cache = rte_malloc_socket("vq log cache",
> sizeof(struct log_cache_entry) * VHOST_LOG_CACHE_NR,
> - 0);
> + 0, vq->numa_node);
> /*
> * If log cache alloc fail, don't fail migration, but no
> * caching will be done, which will impact performance
> --
> 2.31.1
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
next prev parent reply other threads:[~2021-06-25 7:26 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-18 14:03 [dpdk-dev] [PATCH v6 0/7] vhost: Fix and improve NUMA reallocation Maxime Coquelin
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 1/7] vhost: fix missing memory table NUMA realloc Maxime Coquelin
2021-06-25 2:26 ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 2/7] vhost: fix missing guest pages " Maxime Coquelin
2021-06-25 2:26 ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 3/7] vhost: fix missing cache logging " Maxime Coquelin
2021-06-25 2:50 ` Xia, Chenbo
2021-06-29 14:38 ` Maxime Coquelin
2021-06-30 8:50 ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 4/7] vhost: fix NUMA reallocation with multiqueue Maxime Coquelin
2021-06-25 2:56 ` Xia, Chenbo
2021-06-25 11:37 ` Xia, Chenbo
2021-06-29 14:35 ` Maxime Coquelin
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 5/7] vhost: improve NUMA reallocation Maxime Coquelin
2021-06-25 7:26 ` Xia, Chenbo
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue Maxime Coquelin
2021-06-25 7:26 ` Xia, Chenbo [this message]
2021-06-18 14:03 ` [dpdk-dev] [PATCH v6 7/7] vhost: convert inflight data to DPDK allocation API Maxime Coquelin
2021-06-25 7:26 ` Xia, Chenbo
2021-06-29 14:36 ` Maxime Coquelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=MN2PR11MB40634F98EB43F2B9EDFE14E79C069@MN2PR11MB4063.namprd11.prod.outlook.com \
--to=chenbo.xia@intel.com \
--cc=david.marchand@redhat.com \
--cc=dev@dpdk.org \
--cc=maxime.coquelin@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).