* [dpdk-stable] [PATCH 17.11 1/2] vhost: un-inline dirty pages logging functions
2020-01-09 17:58 [dpdk-stable] [PATCH 17.11 0/2] Backport GPA dirty page logging Adrian Moreno
@ 2020-01-09 17:58 ` Adrian Moreno
2020-01-09 17:58 ` [dpdk-stable] [PATCH 17.11 2/2] vhost: convert buffer addresses to GPA for logging Adrian Moreno
2020-01-09 19:40 ` [dpdk-stable] [PATCH 17.11 0/2] Backport GPA dirty page logging Luca Boccassi
2 siblings, 0 replies; 4+ messages in thread
From: Adrian Moreno @ 2020-01-09 17:58 UTC (permalink / raw)
To: stable; +Cc: luca.boccassi, maxime.coquelin, Tiwei Bie
From: Maxime Coquelin <maxime.coquelin@redhat.com>
[ upstream commit 094b643d9b425c942aa18d1d229832f8d96940af ]
In order to reduce the I-cache pressure, this patch removes
the inlining of the dirty pages logging functions, that we
can consider as cold path.
Indeed, these functions are only called while doing live
migration, so not called most of the time.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>
---
lib/librte_vhost/vhost.c | 134 +++++++++++++++++++++++++++++++++++++++
lib/librte_vhost/vhost.h | 132 ++++----------------------------------
2 files changed, 146 insertions(+), 120 deletions(-)
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index ec584695c..4b4ef56e1 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -110,6 +110,140 @@ get_device(int vid)
return dev;
}
+#define VHOST_LOG_PAGE 4096
+
+/*
+ * Atomically set a bit in memory.
+ */
+static __rte_always_inline void
+vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
+{
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+ /*
+ * __sync_ built-ins are deprecated, but __atomic_ ones
+ * are sub-optimized in older GCC versions.
+ */
+ __sync_fetch_and_or_1(addr, (1U << nr));
+#else
+ __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
+#endif
+}
+
+static __rte_always_inline void
+vhost_log_page(uint8_t *log_base, uint64_t page)
+{
+ vhost_set_bit(page % 8, &log_base[page / 8]);
+}
+
+void
+__vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
+{
+ uint64_t page;
+
+ if (unlikely(!dev->log_base || !len))
+ return;
+
+ if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+ return;
+
+ /* To make sure guest memory updates are committed before logging */
+ rte_smp_wmb();
+
+ page = addr / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < addr + len) {
+ vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+ page += 1;
+ }
+}
+
+void
+__vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ unsigned long *log_base;
+ int i;
+
+ if (unlikely(!dev->log_base))
+ return;
+
+ log_base = (unsigned long *)(uintptr_t)dev->log_base;
+
+ /*
+ * It is expected a write memory barrier has been issued
+ * before this function is called.
+ */
+
+ for (i = 0; i < vq->log_cache_nb_elem; i++) {
+ struct log_cache_entry *elem = vq->log_cache + i;
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+ /*
+ * '__sync' builtins are deprecated, but '__atomic' ones
+ * are sub-optimized in older GCC versions.
+ */
+ __sync_fetch_and_or(log_base + elem->offset, elem->val);
+#else
+ __atomic_fetch_or(log_base + elem->offset, elem->val,
+ __ATOMIC_RELAXED);
+#endif
+ }
+
+ rte_smp_wmb();
+
+ vq->log_cache_nb_elem = 0;
+}
+
+static __rte_always_inline void
+vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t page)
+{
+ uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
+ uint32_t offset = page / (sizeof(unsigned long) << 3);
+ int i;
+
+ for (i = 0; i < vq->log_cache_nb_elem; i++) {
+ struct log_cache_entry *elem = vq->log_cache + i;
+
+ if (elem->offset == offset) {
+ elem->val |= (1UL << bit_nr);
+ return;
+ }
+ }
+
+ if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
+ /*
+ * No more room for a new log cache entry,
+ * so write the dirty log map directly.
+ */
+ rte_smp_wmb();
+ vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+
+ return;
+ }
+
+ vq->log_cache[i].offset = offset;
+ vq->log_cache[i].val = (1UL << bit_nr);
+ vq->log_cache_nb_elem++;
+}
+
+void
+__vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t addr, uint64_t len)
+{
+ uint64_t page;
+
+ if (unlikely(!dev->log_base || !len))
+ return;
+
+ if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+ return;
+
+ page = addr / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < addr + len) {
+ vhost_log_cache_page(dev, vq, page);
+ page += 1;
+ }
+}
+
static void
cleanup_vq(struct vhost_virtqueue *vq, int destroy)
{
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index f8b587a9d..34e2ecc4c 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -271,141 +271,33 @@ struct virtio_net {
} __rte_cache_aligned;
-#define VHOST_LOG_PAGE 4096
-
-/*
- * Atomically set a bit in memory.
- */
-static __rte_always_inline void
-vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
-{
-#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
- /*
- * __sync_ built-ins are deprecated, but __atomic_ ones
- * are sub-optimized in older GCC versions.
- */
- __sync_fetch_and_or_1(addr, (1U << nr));
-#else
- __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
-#endif
-}
-
-static __rte_always_inline void
-vhost_log_page(uint8_t *log_base, uint64_t page)
-{
- vhost_set_bit(page % 8, &log_base[page / 8]);
-}
+void __vhost_log_cache_write(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ uint64_t addr, uint64_t len);
+void __vhost_log_cache_sync(struct virtio_net *dev,
+ struct vhost_virtqueue *vq);
+void __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len);
static __rte_always_inline void
vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
{
- uint64_t page;
-
- if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
- !dev->log_base || !len))
- return;
-
- if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
- return;
-
- /* To make sure guest memory updates are committed before logging */
- rte_smp_wmb();
-
- page = addr / VHOST_LOG_PAGE;
- while (page * VHOST_LOG_PAGE < addr + len) {
- vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
- page += 1;
- }
+ if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
+ __vhost_log_write(dev, addr, len);
}
static __rte_always_inline void
vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
- unsigned long *log_base;
- int i;
-
- if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
- !dev->log_base))
- return;
-
- log_base = (unsigned long *)(uintptr_t)dev->log_base;
-
- /*
- * It is expected a write memory barrier has been issued
- * before this function is called.
- */
-
- for (i = 0; i < vq->log_cache_nb_elem; i++) {
- struct log_cache_entry *elem = vq->log_cache + i;
-
-#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
- /*
- * '__sync' builtins are deprecated, but '__atomic' ones
- * are sub-optimized in older GCC versions.
- */
- __sync_fetch_and_or(log_base + elem->offset, elem->val);
-#else
- __atomic_fetch_or(log_base + elem->offset, elem->val,
- __ATOMIC_RELAXED);
-#endif
- }
-
- rte_smp_wmb();
-
- vq->log_cache_nb_elem = 0;
-}
-
-static __rte_always_inline void
-vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint64_t page)
-{
- uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
- uint32_t offset = page / (sizeof(unsigned long) << 3);
- int i;
-
- for (i = 0; i < vq->log_cache_nb_elem; i++) {
- struct log_cache_entry *elem = vq->log_cache + i;
-
- if (elem->offset == offset) {
- elem->val |= (1UL << bit_nr);
- return;
- }
- }
-
- if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
- /*
- * No more room for a new log cache entry,
- * so write the dirty log map directly.
- */
- rte_smp_wmb();
- vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
-
- return;
- }
-
- vq->log_cache[i].offset = offset;
- vq->log_cache[i].val = (1UL << bit_nr);
- vq->log_cache_nb_elem++;
+ if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
+ __vhost_log_cache_sync(dev, vq);
}
static __rte_always_inline void
vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t addr, uint64_t len)
{
- uint64_t page;
-
- if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
- !dev->log_base || !len))
- return;
-
- if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
- return;
-
- page = addr / VHOST_LOG_PAGE;
- while (page * VHOST_LOG_PAGE < addr + len) {
- vhost_log_cache_page(dev, vq, page);
- page += 1;
- }
+ if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
+ __vhost_log_cache_write(dev, vq, addr, len);
}
static __rte_always_inline void
--
2.21.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [dpdk-stable] [PATCH 17.11 2/2] vhost: convert buffer addresses to GPA for logging
2020-01-09 17:58 [dpdk-stable] [PATCH 17.11 0/2] Backport GPA dirty page logging Adrian Moreno
2020-01-09 17:58 ` [dpdk-stable] [PATCH 17.11 1/2] vhost: un-inline dirty pages logging functions Adrian Moreno
@ 2020-01-09 17:58 ` Adrian Moreno
2020-01-09 19:40 ` [dpdk-stable] [PATCH 17.11 0/2] Backport GPA dirty page logging Luca Boccassi
2 siblings, 0 replies; 4+ messages in thread
From: Adrian Moreno @ 2020-01-09 17:58 UTC (permalink / raw)
To: stable; +Cc: luca.boccassi, maxime.coquelin, Adrian Moreno
[ upstream commit 1fc3b3f06aa9c79c749e8587859d75d237ba9161 ]
Add IOVA versions of dirty page logging functions.
Note that the API facing rte_vhost_log_write is not modified.
So, make explicit that it expects the address in GPA space.
Fixes: 69c90e98f483 ("vhost: enable IOMMU support")
Cc: stable@dpdk.org
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/rte_vhost.h | 2 +-
lib/librte_vhost/vhost.c | 40 +++++++++++++++++++++++++++++++++++
lib/librte_vhost/vhost.h | 31 +++++++++++++++++++++++++++
lib/librte_vhost/virtio_net.c | 18 +++++++++-------
4 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 3fc6034de..eccaa3ed5 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -204,7 +204,7 @@ rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
* @param vid
* vhost device ID
* @param addr
- * the starting address for write
+ * the starting address for write (in guest physical address space)
* @param len
* the length to write
*/
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 4b4ef56e1..6a8f54fba 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -156,6 +156,26 @@ __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
}
}
+void
+__vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t len)
+{
+ uint64_t hva, gpa, map_len;
+ map_len = len;
+
+ hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
+ if (map_len != len) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
+ iova);
+ return;
+ }
+
+ gpa = hva_to_gpa(dev, hva, len);
+ if (gpa)
+ __vhost_log_write(dev, gpa, len);
+}
+
void
__vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
@@ -244,6 +264,26 @@ __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
}
+void
+__vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t len)
+{
+ uint64_t hva, gpa, map_len;
+ map_len = len;
+
+ hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
+ if (map_len != len) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
+ iova);
+ return;
+ }
+
+ gpa = hva_to_gpa(dev, hva, len);
+ if (gpa)
+ __vhost_log_cache_write(dev, vq, gpa, len);
+}
+
static void
cleanup_vq(struct vhost_virtqueue *vq, int destroy)
{
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 34e2ecc4c..aa4c4c941 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -274,9 +274,14 @@ struct virtio_net {
void __vhost_log_cache_write(struct virtio_net *dev,
struct vhost_virtqueue *vq,
uint64_t addr, uint64_t len);
+void __vhost_log_cache_write_iova(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t len);
void __vhost_log_cache_sync(struct virtio_net *dev,
struct vhost_virtqueue *vq);
void __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len);
+void __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t len);
static __rte_always_inline void
vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
@@ -314,6 +319,32 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
vhost_log_write(dev, vq->log_guest_addr + offset, len);
}
+static __rte_always_inline void
+vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t len)
+{
+ if (likely(!(dev->features & (1ULL << VHOST_F_LOG_ALL))))
+ return;
+
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ __vhost_log_cache_write_iova(dev, vq, iova, len);
+ else
+ __vhost_log_cache_write(dev, vq, iova, len);
+}
+
+static __rte_always_inline void
+vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t len)
+{
+ if (likely(!(dev->features & (1ULL << VHOST_F_LOG_ALL))))
+ return;
+
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ __vhost_log_write_iova(dev, vq, iova, len);
+ else
+ __vhost_log_write(dev, iova, len);
+}
+
/* Macros for printing using RTE_LOG */
#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index b302c384d..b8f43900d 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -161,7 +161,8 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
for (i = 0; i < count; i++) {
rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
- vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
+ vhost_log_cache_write_iova(dev, vq, elem[i].log_addr,
+ elem[i].len);
PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
}
}
@@ -278,7 +279,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
virtio_enqueue_offload(m,
(struct virtio_net_hdr *)(uintptr_t)desc_addr);
PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
- vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen);
+ vhost_log_cache_write_iova(dev, vq, desc_gaddr, dev->vhost_hlen);
} else {
struct virtio_net_hdr vnet_hdr;
uint64_t remain = dev->vhost_hlen;
@@ -301,7 +302,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
(void *)(uintptr_t)src, len);
PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0);
- vhost_log_cache_write(dev, vq, guest_addr, len);
+ vhost_log_cache_write_iova(dev, vq, guest_addr, len);
remain -= len;
guest_addr += len;
dst += len;
@@ -382,8 +383,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
desc_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
cpy_len);
- vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
- cpy_len);
+ vhost_log_cache_write_iova(dev, vq,
+ desc_gaddr + desc_offset,
+ cpy_len);
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
cpy_len, 0);
} else {
@@ -808,7 +810,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
PRINT_PACKET(dev, (uintptr_t)dst,
(uint32_t)len, 0);
- vhost_log_cache_write(dev, vq,
+ vhost_log_cache_write_iova(dev, vq,
guest_addr, len);
remain -= len;
@@ -818,7 +820,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
} else {
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
- vhost_log_cache_write(dev, vq, hdr_phys_addr,
+ vhost_log_cache_write_iova(dev, vq, hdr_phys_addr,
dev->vhost_hlen);
}
@@ -832,7 +834,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
desc_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
cpy_len);
- vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
+ vhost_log_cache_write_iova(dev, vq, desc_gaddr + desc_offset,
cpy_len);
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
cpy_len, 0);
--
2.21.1
^ permalink raw reply [flat|nested] 4+ messages in thread