DPDK patches and discussions
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, tiwei.bie@intel.com, jfreimann@redhat.com,
	zhihong.wang@intel.com, bruce.richardson@intel.com,
	konstantin.ananyev@intel.com, david.marchand@redhat.com
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-dev] [PATCH v2 1/5] vhost: un-inline dirty pages logging functions
Date: Fri, 17 May 2019 17:06:09 +0200	[thread overview]
Message-ID: <20190517150613.13310-2-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20190517150613.13310-1-maxime.coquelin@redhat.com>

In order to reduce the I-cache pressure, this patch removes
the inlining of the dirty pages logging functions, that we
can consider as cold path.

Indeed, these functions are only called while doing live
migration, so not called most of the time.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.c | 132 +++++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/vhost.h | 129 ++++----------------------------------
 2 files changed, 144 insertions(+), 117 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 163f4595ef..4a54ad6bd1 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -69,6 +69,138 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
+#define VHOST_LOG_PAGE	4096
+
+/*
+ * Atomically set a bit in memory.
+ */
+static __rte_always_inline void
+vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
+{
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+	/*
+	 * __sync_ built-ins are deprecated, but __atomic_ ones
+	 * are sub-optimized in older GCC versions.
+	 */
+	__sync_fetch_and_or_1(addr, (1U << nr));
+#else
+	__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
+#endif
+}
+
+static __rte_always_inline void
+vhost_log_page(uint8_t *log_base, uint64_t page)
+{
+	vhost_set_bit(page % 8, &log_base[page / 8]);
+}
+
+void
+__vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (unlikely(!dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	/* To make sure guest memory updates are committed before logging */
+	rte_smp_wmb();
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+		page += 1;
+	}
+}
+
+void
+__vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	unsigned long *log_base;
+	int i;
+
+	if (unlikely(!dev->log_base))
+		return;
+
+	rte_smp_wmb();
+
+	log_base = (unsigned long *)(uintptr_t)dev->log_base;
+
+	for (i = 0; i < vq->log_cache_nb_elem; i++) {
+		struct log_cache_entry *elem = vq->log_cache + i;
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+		/*
+		 * '__sync' builtins are deprecated, but '__atomic' ones
+		 * are sub-optimized in older GCC versions.
+		 */
+		__sync_fetch_and_or(log_base + elem->offset, elem->val);
+#else
+		__atomic_fetch_or(log_base + elem->offset, elem->val,
+				__ATOMIC_RELAXED);
+#endif
+	}
+
+	rte_smp_wmb();
+
+	vq->log_cache_nb_elem = 0;
+}
+
+static __rte_always_inline void
+vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t page)
+{
+	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
+	uint32_t offset = page / (sizeof(unsigned long) << 3);
+	int i;
+
+	for (i = 0; i < vq->log_cache_nb_elem; i++) {
+		struct log_cache_entry *elem = vq->log_cache + i;
+
+		if (elem->offset == offset) {
+			elem->val |= (1UL << bit_nr);
+			return;
+		}
+	}
+
+	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
+		/*
+		 * No more room for a new log cache entry,
+		 * so write the dirty log map directly.
+		 */
+		rte_smp_wmb();
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+
+		return;
+	}
+
+	vq->log_cache[i].offset = offset;
+	vq->log_cache[i].val = (1UL << bit_nr);
+	vq->log_cache_nb_elem++;
+}
+
+void
+__vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (unlikely(!dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_cache_page(dev, vq, page);
+		page += 1;
+	}
+}
+
+
 void
 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
 {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index e9138dfab4..3ab7b4950f 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -350,138 +350,33 @@ desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
 		wrap_counter != !!(flags & VRING_DESC_F_USED);
 }
 
-#define VHOST_LOG_PAGE	4096
-
-/*
- * Atomically set a bit in memory.
- */
-static __rte_always_inline void
-vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
-{
-#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
-	/*
-	 * __sync_ built-ins are deprecated, but __atomic_ ones
-	 * are sub-optimized in older GCC versions.
-	 */
-	__sync_fetch_and_or_1(addr, (1U << nr));
-#else
-	__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
-#endif
-}
-
-static __rte_always_inline void
-vhost_log_page(uint8_t *log_base, uint64_t page)
-{
-	vhost_set_bit(page % 8, &log_base[page / 8]);
-}
+void __vhost_log_cache_write(struct virtio_net *dev,
+		struct vhost_virtqueue *vq,
+		uint64_t addr, uint64_t len);
+void __vhost_log_cache_sync(struct virtio_net *dev,
+		struct vhost_virtqueue *vq);
+void __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len);
 
 static __rte_always_inline void
 vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
 {
-	uint64_t page;
-
-	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
-		   !dev->log_base || !len))
-		return;
-
-	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
-		return;
-
-	/* To make sure guest memory updates are committed before logging */
-	rte_smp_wmb();
-
-	page = addr / VHOST_LOG_PAGE;
-	while (page * VHOST_LOG_PAGE < addr + len) {
-		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
-		page += 1;
-	}
+	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
+		__vhost_log_write(dev, addr, len);
 }
 
 static __rte_always_inline void
 vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	unsigned long *log_base;
-	int i;
-
-	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
-		   !dev->log_base))
-		return;
-
-	rte_smp_wmb();
-
-	log_base = (unsigned long *)(uintptr_t)dev->log_base;
-
-	for (i = 0; i < vq->log_cache_nb_elem; i++) {
-		struct log_cache_entry *elem = vq->log_cache + i;
-
-#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
-		/*
-		 * '__sync' builtins are deprecated, but '__atomic' ones
-		 * are sub-optimized in older GCC versions.
-		 */
-		__sync_fetch_and_or(log_base + elem->offset, elem->val);
-#else
-		__atomic_fetch_or(log_base + elem->offset, elem->val,
-				__ATOMIC_RELAXED);
-#endif
-	}
-
-	rte_smp_wmb();
-
-	vq->log_cache_nb_elem = 0;
-}
-
-static __rte_always_inline void
-vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			uint64_t page)
-{
-	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
-	uint32_t offset = page / (sizeof(unsigned long) << 3);
-	int i;
-
-	for (i = 0; i < vq->log_cache_nb_elem; i++) {
-		struct log_cache_entry *elem = vq->log_cache + i;
-
-		if (elem->offset == offset) {
-			elem->val |= (1UL << bit_nr);
-			return;
-		}
-	}
-
-	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
-		/*
-		 * No more room for a new log cache entry,
-		 * so write the dirty log map directly.
-		 */
-		rte_smp_wmb();
-		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
-
-		return;
-	}
-
-	vq->log_cache[i].offset = offset;
-	vq->log_cache[i].val = (1UL << bit_nr);
-	vq->log_cache_nb_elem++;
+	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
+		__vhost_log_cache_sync(dev, vq);
 }
 
 static __rte_always_inline void
 vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			uint64_t addr, uint64_t len)
 {
-	uint64_t page;
-
-	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
-		   !dev->log_base || !len))
-		return;
-
-	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
-		return;
-
-	page = addr / VHOST_LOG_PAGE;
-	while (page * VHOST_LOG_PAGE < addr + len) {
-		vhost_log_cache_page(dev, vq, page);
-		page += 1;
-	}
+	if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL)))
+		__vhost_log_cache_write(dev, vq, addr, len);
 }
 
 static __rte_always_inline void
-- 
2.21.0


  reply	other threads:[~2019-05-17 15:06 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-17 15:06 [dpdk-dev] [PATCH v2 0/5] vhost: I-cache pressure optimizations Maxime Coquelin
2019-05-17 15:06 ` Maxime Coquelin [this message]
2019-05-20  5:18   ` [dpdk-dev] [PATCH v2 1/5] vhost: un-inline dirty pages logging functions Tiwei Bie
2019-05-17 15:06 ` [dpdk-dev] [PATCH v2 2/5] vhost: do not inline packed and split functions Maxime Coquelin
2019-05-20  5:30   ` Tiwei Bie
2019-05-17 15:06 ` [dpdk-dev] [PATCH v2 3/5] vhost: do not inline unlikely fragmented buffers code Maxime Coquelin
2019-05-20  5:51   ` Tiwei Bie
2019-05-24 13:50     ` Maxime Coquelin
2019-05-17 15:06 ` [dpdk-dev] [PATCH v2 4/5] vhost: simplify descriptor's buffer prefetching Maxime Coquelin
2019-05-29  8:05   ` Tiwei Bie
2019-05-17 15:06 ` [dpdk-dev] [PATCH v2 5/5] eal/x86: force inlining of all memcpy and mov helpers Maxime Coquelin
2019-05-20  8:30   ` David Marchand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190517150613.13310-2-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=bruce.richardson@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    --cc=jfreimann@redhat.com \
    --cc=konstantin.ananyev@intel.com \
    --cc=tiwei.bie@intel.com \
    --cc=zhihong.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).