From: Mike Pattrick <mkp@redhat.com>
To: dev@dpdk.org
Cc: david.marchand@redhat.com, maxime.coquelin@redhat.com,
chenbo.xia@intel.com, Mike Pattrick <mkp@redhat.com>
Subject: [PATCH v2] vhost: fix madvise arguments alignment
Date: Wed, 22 Feb 2023 23:35:51 -0500 [thread overview]
Message-ID: <20230223043551.1108541-1-mkp@redhat.com> (raw)
In-Reply-To: <20230222214342.1092333-1-mkp@redhat.com>
The arguments passed to madvise should be aligned to the alignment of
the backing memory. Now we keep track of each regions alignment and use
then when setting coredump preferences. To facilitate this, a new member
was added to rte_vhost_mem_region. A new function was added to easily
translate memory address back to region alignment. Unneeded calls to
madvise were reduced, as the cache removal case should already be
covered by the cache insertion case. The previously inline function
mem_set_dump was removed from a header file and made not inline.
Fixes: 338ad77c9ed3 ("vhost: exclude VM hugepages from coredumps")
Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
Since v1:
- Corrected a cast for 32bit compiles
---
lib/vhost/iotlb.c | 9 +++---
lib/vhost/rte_vhost.h | 1 +
lib/vhost/vhost.h | 12 ++------
lib/vhost/vhost_user.c | 63 +++++++++++++++++++++++++++++++++++-------
4 files changed, 60 insertions(+), 25 deletions(-)
diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c
index a0b8fd7302..5293507b63 100644
--- a/lib/vhost/iotlb.c
+++ b/lib/vhost/iotlb.c
@@ -149,7 +149,6 @@ vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
rte_rwlock_write_lock(&vq->iotlb_lock);
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
TAILQ_REMOVE(&vq->iotlb_list, node, next);
vhost_user_iotlb_pool_put(vq, node);
}
@@ -171,7 +170,6 @@ vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
if (!entry_idx) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
TAILQ_REMOVE(&vq->iotlb_list, node, next);
vhost_user_iotlb_pool_put(vq, node);
vq->iotlb_cache_nr--;
@@ -224,14 +222,16 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq
vhost_user_iotlb_pool_put(vq, new_node);
goto unlock;
} else if (node->iova > new_node->iova) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
+ mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true,
+ hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr));
TAILQ_INSERT_BEFORE(node, new_node, next);
vq->iotlb_cache_nr++;
goto unlock;
}
}
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
+ mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true,
+ hua_to_alignment(dev->mem, (void *)(uintptr_t)new_node->uaddr));
TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
vq->iotlb_cache_nr++;
@@ -259,7 +259,6 @@ vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
break;
if (iova < node->iova + node->size) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
TAILQ_REMOVE(&vq->iotlb_list, node, next);
vhost_user_iotlb_pool_put(vq, node);
vq->iotlb_cache_nr--;
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index a395843fe9..c5c97ea67e 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -136,6 +136,7 @@ struct rte_vhost_mem_region {
void *mmap_addr;
uint64_t mmap_size;
int fd;
+ uint64_t alignment;
};
/**
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 5750f0c005..a2467ba509 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -1009,14 +1009,6 @@ mbuf_is_consumed(struct rte_mbuf *m)
return true;
}
-static __rte_always_inline void
-mem_set_dump(__rte_unused void *ptr, __rte_unused size_t size, __rte_unused bool enable)
-{
-#ifdef MADV_DONTDUMP
- if (madvise(ptr, size, enable ? MADV_DODUMP : MADV_DONTDUMP) == -1) {
- rte_log(RTE_LOG_INFO, vhost_config_log_level,
- "VHOST_CONFIG: could not set coredump preference (%s).\n", strerror(errno));
- }
-#endif
-}
+uint64_t hua_to_alignment(struct rte_vhost_memory *mem, void *ptr);
+void mem_set_dump(void *ptr, size_t size, bool enable, uint64_t alignment);
#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index d702d082dd..6d09597fbe 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -737,6 +737,40 @@ log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq)
return log_gpa;
}
+uint64_t
+hua_to_alignment(struct rte_vhost_memory *mem, void *ptr)
+{
+ struct rte_vhost_mem_region *r;
+ uint32_t i;
+ uintptr_t hua = (uintptr_t)ptr;
+
+ for (i = 0; i < mem->nregions; i++) {
+ r = &mem->regions[i];
+ if (hua >= r->host_user_addr &&
+ hua < r->host_user_addr + r->size) {
+ return r->alignment;
+ }
+ }
+
+ /* If region isn't found, don't align at all */
+ return 1;
+}
+
+void
+mem_set_dump(void *ptr, size_t size, bool enable, uint64_t pagesz)
+{
+#ifdef MADV_DONTDUMP
+ void *start = RTE_PTR_ALIGN_FLOOR(ptr, pagesz);
+ uintptr_t end = RTE_ALIGN_CEIL((uintptr_t)ptr + size, pagesz);
+ size_t len = end - (uintptr_t)start;
+
+ if (madvise(start, len, enable ? MADV_DODUMP : MADV_DONTDUMP) == -1) {
+ rte_log(RTE_LOG_INFO, vhost_config_log_level,
+ "VHOST_CONFIG: could not set coredump preference (%s).\n", strerror(errno));
+ }
+#endif
+}
+
static void
translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
{
@@ -767,6 +801,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->desc_packed, len, true,
+ hua_to_alignment(dev->mem, vq->desc_packed));
numa_realloc(&dev, &vq);
*pdev = dev;
*pvq = vq;
@@ -782,6 +818,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->driver_event, len, true,
+ hua_to_alignment(dev->mem, vq->driver_event));
len = sizeof(struct vring_packed_desc_event);
vq->device_event = (struct vring_packed_desc_event *)
(uintptr_t)ring_addr_to_vva(dev,
@@ -793,9 +831,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
- mem_set_dump(vq->desc_packed, len, true);
- mem_set_dump(vq->driver_event, len, true);
- mem_set_dump(vq->device_event, len, true);
+ mem_set_dump(vq->device_event, len, true,
+ hua_to_alignment(dev->mem, vq->device_event));
vq->access_ok = true;
return;
}
@@ -812,6 +849,7 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->desc, len, true, hua_to_alignment(dev->mem, vq->desc));
numa_realloc(&dev, &vq);
*pdev = dev;
*pvq = vq;
@@ -827,6 +865,7 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->avail, len, true, hua_to_alignment(dev->mem, vq->avail));
len = sizeof(struct vring_used) +
sizeof(struct vring_used_elem) * vq->size;
if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
@@ -839,6 +878,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->used, len, true, hua_to_alignment(dev->mem, vq->used));
+
if (vq->last_used_idx != vq->used->idx) {
VHOST_LOG_CONFIG(dev->ifname, WARNING,
"last_used_idx (%u) and vq->used->idx (%u) mismatches;\n",
@@ -849,9 +890,6 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
"some packets maybe resent for Tx and dropped for Rx\n");
}
- mem_set_dump(vq->desc, len, true);
- mem_set_dump(vq->avail, len, true);
- mem_set_dump(vq->used, len, true);
vq->access_ok = true;
VHOST_LOG_CONFIG(dev->ifname, DEBUG, "mapped address desc: %p\n", vq->desc);
@@ -1230,7 +1268,8 @@ vhost_user_mmap_region(struct virtio_net *dev,
region->mmap_addr = mmap_addr;
region->mmap_size = mmap_size;
region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
- mem_set_dump(mmap_addr, mmap_size, false);
+ region->alignment = alignment;
+ mem_set_dump(mmap_addr, mmap_size, false, alignment);
if (dev->async_copy) {
if (add_guest_pages(dev, region, alignment) < 0) {
@@ -1535,7 +1574,6 @@ inflight_mem_alloc(struct virtio_net *dev, const char *name, size_t size, int *f
return NULL;
}
- mem_set_dump(ptr, size, false);
*fd = mfd;
return ptr;
}
@@ -1566,6 +1604,7 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
uint64_t pervq_inflight_size, mmap_size;
uint16_t num_queues, queue_size;
struct virtio_net *dev = *pdev;
+ uint64_t alignment;
int fd, i, j;
int numa_node = SOCKET_ID_ANY;
void *addr;
@@ -1628,6 +1667,8 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
dev->inflight_info->fd = -1;
}
+ alignment = get_blk_size(fd);
+ mem_set_dump(addr, mmap_size, false, alignment);
dev->inflight_info->addr = addr;
dev->inflight_info->size = ctx->msg.payload.inflight.mmap_size = mmap_size;
dev->inflight_info->fd = ctx->fds[0] = fd;
@@ -1744,10 +1785,10 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev,
dev->inflight_info->fd = -1;
}
- mem_set_dump(addr, mmap_size, false);
dev->inflight_info->fd = fd;
dev->inflight_info->addr = addr;
dev->inflight_info->size = mmap_size;
+ mem_set_dump(addr, mmap_size, false, get_blk_size(fd));
for (i = 0; i < num_queues; i++) {
vq = dev->virtqueue[i];
@@ -2242,6 +2283,7 @@ vhost_user_set_log_base(struct virtio_net **pdev,
struct virtio_net *dev = *pdev;
int fd = ctx->fds[0];
uint64_t size, off;
+ uint64_t alignment;
void *addr;
uint32_t i;
@@ -2280,6 +2322,7 @@ vhost_user_set_log_base(struct virtio_net **pdev,
* fail when offset is not page size aligned.
*/
addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ alignment = get_blk_size(fd);
close(fd);
if (addr == MAP_FAILED) {
VHOST_LOG_CONFIG(dev->ifname, ERR, "mmap log base failed!\n");
@@ -2296,7 +2339,7 @@ vhost_user_set_log_base(struct virtio_net **pdev,
dev->log_addr = (uint64_t)(uintptr_t)addr;
dev->log_base = dev->log_addr + off;
dev->log_size = size;
- mem_set_dump(addr, size, false);
+ mem_set_dump(addr, size + off, false, alignment);
for (i = 0; i < dev->nr_vring; i++) {
struct vhost_virtqueue *vq = dev->virtqueue[i];
--
2.31.1
next prev parent reply other threads:[~2023-02-23 4:35 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-06 15:05 [PATCH v2] vhost: exclude VM hugepages from coredumps Mike Pattrick
2022-12-07 16:54 ` [PATCH v3] " Mike Pattrick
2022-12-20 12:43 ` Maxime Coquelin
2023-02-03 14:50 ` Maxime Coquelin
2023-02-10 15:53 ` David Marchand
2023-02-10 15:59 ` Maxime Coquelin
2023-02-10 21:12 ` Mike Pattrick
2023-02-21 16:26 ` Maxime Coquelin
2023-02-22 13:59 ` Mike Pattrick
2023-02-22 21:43 ` [PATCH] vhost: fix madvise arguments alignment Mike Pattrick
2023-02-23 4:35 ` Mike Pattrick [this message]
2023-02-23 16:12 ` [PATCH v2] " Maxime Coquelin
2023-02-23 16:57 ` Mike Pattrick
2023-02-24 15:05 ` Patrick Robb
2023-03-01 20:02 ` [PATCH v3] " Mike Pattrick
2023-03-02 10:19 ` Maxime Coquelin
2023-03-06 14:22 ` Maxime Coquelin
2023-02-03 17:45 ` [PATCH v2] vhost: exclude VM hugepages from coredumps Stephen Hemminger
2023-02-06 1:13 ` Mike Pattrick
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230223043551.1108541-1-mkp@redhat.com \
--to=mkp@redhat.com \
--cc=chenbo.xia@intel.com \
--cc=david.marchand@redhat.com \
--cc=dev@dpdk.org \
--cc=maxime.coquelin@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).