From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 8A1E3A04F9 for ; Thu, 9 Jan 2020 18:58:57 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 783EC1E540; Thu, 9 Jan 2020 18:58:57 +0100 (CET) Received: from us-smtp-delivery-1.mimecast.com (us-smtp-1.mimecast.com [207.211.31.81]) by dpdk.org (Postfix) with ESMTP id 2181C1E540 for ; Thu, 9 Jan 2020 18:58:56 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1578592735; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=qlC6VMO6mSKAc/8NZloB0OcfbjjwV+3gFjNWsA9pQ94=; b=VsHpjcI0nlLG9JOaU/ON6E+EEk++5DPF1hvdkhQVoSh5VifKGN7sXd+mxP9UNNROL75PSP bkJ98lM9HYFjPoxFNkSrRgx73IRFha9mubTbjdOOPaKTyH4Yt2ej+zJU5f/CgPes4D7cD4 gthzndt4zWBNo/UhpoR51PxcFOB5D7E= Received: from mimecast-mx01.redhat.com (mimecast-mx01.redhat.com [209.132.183.4]) (Using TLS) by relay.mimecast.com with ESMTP id us-mta-263-7QLwfo8mMv6nIZf_EQk2Dw-1; Thu, 09 Jan 2020 12:58:54 -0500 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx01.redhat.com (Postfix) with ESMTPS id 155F4107ACC4; Thu, 9 Jan 2020 17:58:53 +0000 (UTC) Received: from amorenoz.users.ipa.redhat.com (ovpn-117-44.ams2.redhat.com [10.36.117.44]) by smtp.corp.redhat.com (Postfix) with ESMTP id 056E57C3B5; Thu, 9 Jan 2020 17:58:51 +0000 (UTC) From: Adrian Moreno To: stable@dpdk.org Cc: luca.boccassi@gmail.com, maxime.coquelin@redhat.com, Tiwei Bie Date: Thu, 9 Jan 2020 18:58:41 +0100 Message-Id: <20200109175842.24905-2-amorenoz@redhat.com> In-Reply-To: <20200109175842.24905-1-amorenoz@redhat.com> References: <20200109175842.24905-1-amorenoz@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-MC-Unique: 7QLwfo8mMv6nIZf_EQk2Dw-1 X-Mimecast-Spam-Score: 0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Subject: [dpdk-stable] [PATCH 17.11 1/2] vhost: un-inline dirty pages logging functions X-BeenThere: stable@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches for DPDK stable branches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: stable-bounces@dpdk.org Sender: "stable" From: Maxime Coquelin [ upstream commit 094b643d9b425c942aa18d1d229832f8d96940af ] In order to reduce the I-cache pressure, this patch removes the inlining of the dirty pages logging functions, that we can consider as cold path. Indeed, these functions are only called while doing live migration, so not called most of the time. Signed-off-by: Maxime Coquelin Reviewed-by: Tiwei Bie --- lib/librte_vhost/vhost.c | 134 +++++++++++++++++++++++++++++++++++++++ lib/librte_vhost/vhost.h | 132 ++++---------------------------------- 2 files changed, 146 insertions(+), 120 deletions(-) diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index ec584695c..4b4ef56e1 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -110,6 +110,140 @@ get_device(int vid) =09return dev; } =20 +#define VHOST_LOG_PAGE=094096 + +/* + * Atomically set a bit in memory. + */ +static __rte_always_inline void +vhost_set_bit(unsigned int nr, volatile uint8_t *addr) +{ +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) +=09/* +=09 * __sync_ built-ins are deprecated, but __atomic_ ones +=09 * are sub-optimized in older GCC versions. +=09 */ +=09__sync_fetch_and_or_1(addr, (1U << nr)); +#else +=09__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED); +#endif +} + +static __rte_always_inline void +vhost_log_page(uint8_t *log_base, uint64_t page) +{ +=09vhost_set_bit(page % 8, &log_base[page / 8]); +} + +void +__vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) +{ +=09uint64_t page; + +=09if (unlikely(!dev->log_base || !len)) +=09=09return; + +=09if (unlikely(dev->log_size <=3D ((addr + len - 1) / VHOST_LOG_PAGE / 8)= )) +=09=09return; + +=09/* To make sure guest memory updates are committed before logging */ +=09rte_smp_wmb(); + +=09page =3D addr / VHOST_LOG_PAGE; +=09while (page * VHOST_LOG_PAGE < addr + len) { +=09=09vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); +=09=09page +=3D 1; +=09} +} + +void +__vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ +=09unsigned long *log_base; +=09int i; + +=09if (unlikely(!dev->log_base)) +=09=09return; + +=09log_base =3D (unsigned long *)(uintptr_t)dev->log_base; + +=09/* +=09 * It is expected a write memory barrier has been issued +=09 * before this function is called. +=09 */ + +=09for (i =3D 0; i < vq->log_cache_nb_elem; i++) { +=09=09struct log_cache_entry *elem =3D vq->log_cache + i; + +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) +=09=09/* +=09=09 * '__sync' builtins are deprecated, but '__atomic' ones +=09=09 * are sub-optimized in older GCC versions. +=09=09 */ +=09=09__sync_fetch_and_or(log_base + elem->offset, elem->val); +#else +=09=09__atomic_fetch_or(log_base + elem->offset, elem->val, +=09=09=09=09__ATOMIC_RELAXED); +#endif +=09} + +=09rte_smp_wmb(); + +=09vq->log_cache_nb_elem =3D 0; +} + +static __rte_always_inline void +vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, +=09=09=09uint64_t page) +{ +=09uint32_t bit_nr =3D page % (sizeof(unsigned long) << 3); +=09uint32_t offset =3D page / (sizeof(unsigned long) << 3); +=09int i; + +=09for (i =3D 0; i < vq->log_cache_nb_elem; i++) { +=09=09struct log_cache_entry *elem =3D vq->log_cache + i; + +=09=09if (elem->offset =3D=3D offset) { +=09=09=09elem->val |=3D (1UL << bit_nr); +=09=09=09return; +=09=09} +=09} + +=09if (unlikely(i >=3D VHOST_LOG_CACHE_NR)) { +=09=09/* +=09=09 * No more room for a new log cache entry, +=09=09 * so write the dirty log map directly. +=09=09 */ +=09=09rte_smp_wmb(); +=09=09vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + +=09=09return; +=09} + +=09vq->log_cache[i].offset =3D offset; +=09vq->log_cache[i].val =3D (1UL << bit_nr); +=09vq->log_cache_nb_elem++; +} + +void +__vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq= , +=09=09=09uint64_t addr, uint64_t len) +{ +=09uint64_t page; + +=09if (unlikely(!dev->log_base || !len)) +=09=09return; + +=09if (unlikely(dev->log_size <=3D ((addr + len - 1) / VHOST_LOG_PAGE / 8)= )) +=09=09return; + +=09page =3D addr / VHOST_LOG_PAGE; +=09while (page * VHOST_LOG_PAGE < addr + len) { +=09=09vhost_log_cache_page(dev, vq, page); +=09=09page +=3D 1; +=09} +} + static void cleanup_vq(struct vhost_virtqueue *vq, int destroy) { diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index f8b587a9d..34e2ecc4c 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -271,141 +271,33 @@ struct virtio_net { } __rte_cache_aligned; =20 =20 -#define VHOST_LOG_PAGE=094096 - -/* - * Atomically set a bit in memory. - */ -static __rte_always_inline void -vhost_set_bit(unsigned int nr, volatile uint8_t *addr) -{ -#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) -=09/* -=09 * __sync_ built-ins are deprecated, but __atomic_ ones -=09 * are sub-optimized in older GCC versions. -=09 */ -=09__sync_fetch_and_or_1(addr, (1U << nr)); -#else -=09__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED); -#endif -} - -static __rte_always_inline void -vhost_log_page(uint8_t *log_base, uint64_t page) -{ -=09vhost_set_bit(page % 8, &log_base[page / 8]); -} +void __vhost_log_cache_write(struct virtio_net *dev, +=09=09struct vhost_virtqueue *vq, +=09=09uint64_t addr, uint64_t len); +void __vhost_log_cache_sync(struct virtio_net *dev, +=09=09struct vhost_virtqueue *vq); +void __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len= ); =20 static __rte_always_inline void vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) { -=09uint64_t page; - -=09if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) =3D=3D 0) || -=09=09 !dev->log_base || !len)) -=09=09return; - -=09if (unlikely(dev->log_size <=3D ((addr + len - 1) / VHOST_LOG_PAGE / 8)= )) -=09=09return; - -=09/* To make sure guest memory updates are committed before logging */ -=09rte_smp_wmb(); - -=09page =3D addr / VHOST_LOG_PAGE; -=09while (page * VHOST_LOG_PAGE < addr + len) { -=09=09vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); -=09=09page +=3D 1; -=09} +=09if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL))) +=09=09__vhost_log_write(dev, addr, len); } =20 static __rte_always_inline void vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) { -=09unsigned long *log_base; -=09int i; - -=09if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) =3D=3D 0) || -=09=09 !dev->log_base)) -=09=09return; - -=09log_base =3D (unsigned long *)(uintptr_t)dev->log_base; - -=09/* -=09 * It is expected a write memory barrier has been issued -=09 * before this function is called. -=09 */ - -=09for (i =3D 0; i < vq->log_cache_nb_elem; i++) { -=09=09struct log_cache_entry *elem =3D vq->log_cache + i; - -#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) -=09=09/* -=09=09 * '__sync' builtins are deprecated, but '__atomic' ones -=09=09 * are sub-optimized in older GCC versions. -=09=09 */ -=09=09__sync_fetch_and_or(log_base + elem->offset, elem->val); -#else -=09=09__atomic_fetch_or(log_base + elem->offset, elem->val, -=09=09=09=09__ATOMIC_RELAXED); -#endif -=09} - -=09rte_smp_wmb(); - -=09vq->log_cache_nb_elem =3D 0; -} - -static __rte_always_inline void -vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, -=09=09=09uint64_t page) -{ -=09uint32_t bit_nr =3D page % (sizeof(unsigned long) << 3); -=09uint32_t offset =3D page / (sizeof(unsigned long) << 3); -=09int i; - -=09for (i =3D 0; i < vq->log_cache_nb_elem; i++) { -=09=09struct log_cache_entry *elem =3D vq->log_cache + i; - -=09=09if (elem->offset =3D=3D offset) { -=09=09=09elem->val |=3D (1UL << bit_nr); -=09=09=09return; -=09=09} -=09} - -=09if (unlikely(i >=3D VHOST_LOG_CACHE_NR)) { -=09=09/* -=09=09 * No more room for a new log cache entry, -=09=09 * so write the dirty log map directly. -=09=09 */ -=09=09rte_smp_wmb(); -=09=09vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); - -=09=09return; -=09} - -=09vq->log_cache[i].offset =3D offset; -=09vq->log_cache[i].val =3D (1UL << bit_nr); -=09vq->log_cache_nb_elem++; +=09if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL))) +=09=09__vhost_log_cache_sync(dev, vq); } =20 static __rte_always_inline void vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, =09=09=09uint64_t addr, uint64_t len) { -=09uint64_t page; - -=09if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) =3D=3D 0) || -=09=09 !dev->log_base || !len)) -=09=09return; - -=09if (unlikely(dev->log_size <=3D ((addr + len - 1) / VHOST_LOG_PAGE / 8)= )) -=09=09return; - -=09page =3D addr / VHOST_LOG_PAGE; -=09while (page * VHOST_LOG_PAGE < addr + len) { -=09=09vhost_log_cache_page(dev, vq, page); -=09=09page +=3D 1; -=09} +=09if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL))) +=09=09__vhost_log_cache_write(dev, vq, addr, len); } =20 static __rte_always_inline void --=20 2.21.1