From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1032AA32A2 for ; Thu, 24 Oct 2019 12:43:11 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 3BBD61E931; Thu, 24 Oct 2019 12:43:01 +0200 (CEST) Received: from foss.arm.com (unknown [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id A51341E931 for ; Thu, 24 Oct 2019 12:42:58 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id E32F94AC; Thu, 24 Oct 2019 03:42:50 -0700 (PDT) Received: from net-arm-thunderx2-01.test.ast.arm.com (net-arm-thunderx2-01.shanghai.arm.com [10.169.40.40]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 9484A3F71F; Thu, 24 Oct 2019 03:42:47 -0700 (PDT) From: Gavin Hu To: dev@dpdk.org Cc: nd@arm.com, david.marchand@redhat.com, konstantin.ananyev@intel.com, thomas@monjalon.net, stephen@networkplumber.org, hemant.agrawal@nxp.com, jerinj@marvell.com, pbhagavatula@marvell.com, Honnappa.Nagarahalli@arm.com, ruifeng.wang@arm.com, phil.yang@arm.com, steve.capper@arm.com Date: Thu, 24 Oct 2019 18:42:25 +0800 Message-Id: <1571913748-51735-3-git-send-email-gavin.hu@arm.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1571913748-51735-1-git-send-email-gavin.hu@arm.com> References: <1571913748-51735-1-git-send-email-gavin.hu@arm.com> In-Reply-To: <1561911676-37718-1-git-send-email-gavin.hu@arm.com> References: <1561911676-37718-1-git-send-email-gavin.hu@arm.com> Subject: [dpdk-dev] [PATCH v9 2/5] eal: add the APIs to wait until equal X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The rte_wait_until_equal_xx APIs abstract the functionality of 'polling for a memory location to become equal to a given value'. Add the RTE_ARM_USE_WFE configuration entry for aarch64, disabled by default. When it is enabled, the above APIs will call WFE instruction to save CPU cycles and power. >From a VM, when calling this API on aarch64, it may trap in and out to release vCPUs whereas cause high exit latency. Since kernel 4.18.20 an adaptive trapping mechanism is introduced to balance the latency and workload. Signed-off-by: Gavin Hu Reviewed-by: Ruifeng Wang Reviewed-by: Steve Capper Reviewed-by: Ola Liljedahl Reviewed-by: Honnappa Nagarahalli Reviewed-by: Phil Yang Acked-by: Pavan Nikhilesh Acked-by: Jerin Jacob --- config/arm/meson.build | 1 + config/common_base | 5 + .../common/include/arch/arm/rte_pause_64.h | 70 +++++++ lib/librte_eal/common/include/generic/rte_pause.h | 217 +++++++++++++++++++++ 4 files changed, 293 insertions(+) diff --git a/config/arm/meson.build b/config/arm/meson.build index 979018e..b4b4cac 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -26,6 +26,7 @@ flags_common_default = [ ['RTE_LIBRTE_AVP_PMD', false], ['RTE_SCHED_VECTOR', false], + ['RTE_ARM_USE_WFE', false], ] flags_generic = [ diff --git a/config/common_base b/config/common_base index e843a21..c812156 100644 --- a/config/common_base +++ b/config/common_base @@ -111,6 +111,11 @@ CONFIG_RTE_MAX_VFIO_CONTAINERS=64 CONFIG_RTE_MALLOC_DEBUG=n CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n CONFIG_RTE_USE_LIBBSD=n +# Use WFE instructions to implement the rte_wait_for_equal_xxx APIs, +# calling these APIs put the cores in low power state while waiting +# for the memory address to become equal to the expected value. +# This is supported only by aarch64. +CONFIG_RTE_ARM_USE_WFE=n # # Recognize/ignore the AVX/AVX512 CPU flags for performance/power testing. diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h index 93895d3..7bc8efb 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2017 Cavium, Inc + * Copyright(c) 2019 Arm Limited */ #ifndef _RTE_PAUSE_ARM64_H_ @@ -17,6 +18,75 @@ static inline void rte_pause(void) asm volatile("yield" ::: "memory"); } +#ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED +static inline void rte_sevl(void) +{ + asm volatile("sevl" : : : "memory"); +} + +static inline void rte_wfe(void) +{ + asm volatile("wfe" : : : "memory"); +} + +static __rte_always_inline uint16_t +__atomic_load_ex_16(volatile uint16_t *addr, int memorder) +{ + uint16_t tmp; + assert((memorder == __ATOMIC_ACQUIRE) + || (memorder == __ATOMIC_RELAXED)); + if (memorder == __ATOMIC_ACQUIRE) + asm volatile("ldaxrh %w[tmp], [%x[addr]]" + : [tmp] "=&r" (tmp) + : [addr] "r"(addr) + : "memory"); + else if (memorder == __ATOMIC_RELAXED) + asm volatile("ldxrh %w[tmp], [%x[addr]]" + : [tmp] "=&r" (tmp) + : [addr] "r"(addr) + : "memory"); + return tmp; +} + +static __rte_always_inline uint32_t +__atomic_load_ex_32(volatile uint32_t *addr, int memorder) +{ + uint32_t tmp; + assert((memorder == __ATOMIC_ACQUIRE) + || (memorder == __ATOMIC_RELAXED)); + if (memorder == __ATOMIC_ACQUIRE) + asm volatile("ldaxr %w[tmp], [%x[addr]]" + : [tmp] "=&r" (tmp) + : [addr] "r"(addr) + : "memory"); + else if (memorder == __ATOMIC_RELAXED) + asm volatile("ldxr %w[tmp], [%x[addr]]" + : [tmp] "=&r" (tmp) + : [addr] "r"(addr) + : "memory"); + return tmp; +} + +static __rte_always_inline uint64_t +__atomic_load_ex_64(volatile uint64_t *addr, int memorder) +{ + uint64_t tmp; + assert((memorder == __ATOMIC_ACQUIRE) + || (memorder == __ATOMIC_RELAXED)); + if (memorder == __ATOMIC_ACQUIRE) + asm volatile("ldaxr %x[tmp], [%x[addr]]" + : [tmp] "=&r" (tmp) + : [addr] "r"(addr) + : "memory"); + else if (memorder == __ATOMIC_RELAXED) + asm volatile("ldxr %x[tmp], [%x[addr]]" + : [tmp] "=&r" (tmp) + : [addr] "r"(addr) + : "memory"); + return tmp; +} +#endif + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/generic/rte_pause.h b/lib/librte_eal/common/include/generic/rte_pause.h index 52bd4db..4db44f9 100644 --- a/lib/librte_eal/common/include/generic/rte_pause.h +++ b/lib/librte_eal/common/include/generic/rte_pause.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2017 Cavium, Inc + * Copyright(c) 2019 Arm Limited */ #ifndef _RTE_PAUSE_H_ @@ -12,6 +13,12 @@ * */ +#include +#include +#include +#include +#include + /** * Pause CPU execution for a short while * @@ -20,4 +27,214 @@ */ static inline void rte_pause(void); +static inline void rte_sevl(void); +static inline void rte_wfe(void); +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Atomic load from addr, it returns the 16-bit content of *addr. + * + * @param addr + * A pointer to the memory location. + * @param memorder + * The valid memory order variants are __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. + * These map to C++11 memory orders with the same names, see the C++11 standard + * the GCC wiki on atomic synchronization for detailed definitions. + */ +static __rte_always_inline uint16_t +__atomic_load_ex_16(volatile uint16_t *addr, int memorder); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Atomic load from addr, it returns the 32-bit content of *addr. + * + * @param addr + * A pointer to the memory location. + * @param memorder + * The valid memory order variants are __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. + * These map to C++11 memory orders with the same names, see the C++11 standard + * the GCC wiki on atomic synchronization for detailed definitions. + */ +static __rte_always_inline uint32_t +__atomic_load_ex_32(volatile uint32_t *addr, int memorder); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Atomic load from addr, it returns the 64-bit content of *addr. + * + * @param addr + * A pointer to the memory location. + * @param memorder + * The valid memory order variants are __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. + * These map to C++11 memory orders with the same names, see the C++11 standard + * the GCC wiki on atomic synchronization for detailed definitions. + */ +static __rte_always_inline uint64_t +__atomic_load_ex_64(volatile uint64_t *addr, int memorder); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Wait for *addr to be updated with a 16-bit expected value, with a relaxed + * memory ordering model meaning the loads around this API can be reordered. + * + * @param addr + * A pointer to the memory location. + * @param expected + * A 16-bit expected value to be in the memory location. + * @param memorder + * Two different memory orders that can be specified: + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to + * C++11 memory orders with the same names, see the C++11 standard or + * the GCC wiki on atomic synchronization for detailed definition. + */ +__rte_experimental +static __rte_always_inline void +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, +int memorder); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Wait for *addr to be updated with a 32-bit expected value, with a relaxed + * memory ordering model meaning the loads around this API can be reordered. + * + * @param addr + * A pointer to the memory location. + * @param expected + * A 32-bit expected value to be in the memory location. + * @param memorder + * Two different memory orders that can be specified: + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to + * C++11 memory orders with the same names, see the C++11 standard or + * the GCC wiki on atomic synchronization for detailed definition. + */ +__rte_experimental +static __rte_always_inline void +rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, +int memorder); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Wait for *addr to be updated with a 64-bit expected value, with a relaxed + * memory ordering model meaning the loads around this API can be reordered. + * + * @param addr + * A pointer to the memory location. + * @param expected + * A 64-bit expected value to be in the memory location. + * @param memorder + * Two different memory orders that can be specified: + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to + * C++11 memory orders with the same names, see the C++11 standard or + * the GCC wiki on atomic synchronization for detailed definition. + */ +__rte_experimental +static __rte_always_inline void +rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, +int memorder); + +#ifdef RTE_ARM_USE_WFE +#define RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED +#endif + +#ifndef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED +static inline void rte_sevl(void) +{ +} + +static inline void rte_wfe(void) +{ + rte_pause(); +} + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Atomic load from addr, it returns the 16-bit content of *addr. + * + * @param addr + * A pointer to the memory location. + * @param memorder + * The valid memory order variants are __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. + * These map to C++11 memory orders with the same names, see the C++11 standard + * the GCC wiki on atomic synchronization for detailed definitions. + */ +static __rte_always_inline uint16_t +__atomic_load_ex_16(volatile uint16_t *addr, int memorder) +{ + uint16_t tmp; + assert((memorder == __ATOMIC_ACQUIRE) + || (memorder == __ATOMIC_RELAXED)); + tmp = __atomic_load_n(addr, memorder); + return tmp; +} + +static __rte_always_inline uint32_t +__atomic_load_ex_32(volatile uint32_t *addr, int memorder) +{ + uint32_t tmp; + assert((memorder == __ATOMIC_ACQUIRE) + || (memorder == __ATOMIC_RELAXED)); + tmp = __atomic_load_n(addr, memorder); + return tmp; +} + +static __rte_always_inline uint64_t +__atomic_load_ex_64(volatile uint64_t *addr, int memorder) +{ + uint64_t tmp; + assert((memorder == __ATOMIC_ACQUIRE) + || (memorder == __ATOMIC_RELAXED)); + tmp = __atomic_load_n(addr, memorder); + return tmp; +} + +static __rte_always_inline void +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, +int memorder) +{ + if (__atomic_load_n(addr, memorder) != expected) { + rte_sevl(); + do { + rte_wfe(); + } while (__atomic_load_ex_16(addr, memorder) != expected); + } +} + +static __rte_always_inline void +rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, +int memorder) +{ + if (__atomic_load_ex_32(addr, memorder) != expected) { + rte_sevl(); + do { + rte_wfe(); + } while (__atomic_load_ex_32(addr, memorder) != expected); + } +} + +static __rte_always_inline void +rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, +int memorder) +{ + if (__atomic_load_ex_64(addr, memorder) != expected) { + rte_sevl(); + do { + rte_wfe(); + } while (__atomic_load_ex_64(addr, memorder) != expected); + } +} +#endif + #endif /* _RTE_PAUSE_H_ */ -- 2.7.4