From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 857484234B; Tue, 10 Oct 2023 12:14:31 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 0B74540297; Tue, 10 Oct 2023 12:14:31 +0200 (CEST) Received: from forward500c.mail.yandex.net (forward500c.mail.yandex.net [178.154.239.208]) by mails.dpdk.org (Postfix) with ESMTP id 9358040278 for ; Tue, 10 Oct 2023 12:14:30 +0200 (CEST) Received: from mail-nwsmtp-smtp-production-main-46.sas.yp-c.yandex.net (mail-nwsmtp-smtp-production-main-46.sas.yp-c.yandex.net [IPv6:2a02:6b8:c08:4212:0:640:eaad:0]) by forward500c.mail.yandex.net (Yandex) with ESMTP id CAD525FD60; Tue, 10 Oct 2023 13:14:29 +0300 (MSK) Received: by mail-nwsmtp-smtp-production-main-46.sas.yp-c.yandex.net (smtp/Yandex) with ESMTPSA id QEa0SpEDfSw0-KODjGoZD; Tue, 10 Oct 2023 13:14:28 +0300 X-Yandex-Fwd: 1 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yandex.ru; s=mail; t=1696932869; bh=4YUXb+fn48+C/xyfWWoLzizfysCyAjEc6+MIBOFsBSw=; h=From:In-Reply-To:Cc:Date:References:To:Subject:Message-ID; b=meQWNX2xd1EoGgKoh2waA2ZHqsBapDvezaYKcyqbTnFcsXiInlh68X11exZUtyaLp jI1AdG9J2ZayykJkqTwS9MVpCQQ7wKSMEMQuE4gu5/yv1QACk7TcxR+tbjprmXnh4M 053+LICiMJkZNFadGkTnWLZgnR9tyu8kMh+nVePM= Authentication-Results: mail-nwsmtp-smtp-production-main-46.sas.yp-c.yandex.net; dkim=pass header.i=@yandex.ru Message-ID: <2366fabf-8374-4615-9d46-c97435533f85@yandex.ru> Date: Tue, 10 Oct 2023 11:14:24 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Subject: Re: [PATCH v6 3/3] power: amd power monitor support Content-Language: en-US, ru-RU To: Sivaprasad Tummala , david.marchand@redhat.com Cc: david.hunt@intel.com, roretzla@linux.microsoft.com, anatoly.burakov@intel.com, thomas@monjalon.net, ferruh.yigit@amd.com, dev@dpdk.org References: <20230816185959.1331336-3-sivaprasad.tummala@amd.com> <20231009140546.862553-1-sivaprasad.tummala@amd.com> <20231009140546.862553-3-sivaprasad.tummala@amd.com> From: Konstantin Ananyev In-Reply-To: <20231009140546.862553-3-sivaprasad.tummala@amd.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org 09.10.2023 15:05, Sivaprasad Tummala пишет: > mwaitx allows EPYC processors to enter a implementation dependent > power/performance optimized state (C1 state) for a specific period > or until a store to the monitored address range. > > Signed-off-by: Sivaprasad Tummala > Acked-by: Anatoly Burakov > --- > lib/eal/x86/rte_power_intrinsics.c | 108 ++++++++++++++++++++++------- > 1 file changed, 84 insertions(+), 24 deletions(-) > > diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c > index 664cde01e9..0d2953f570 100644 > --- a/lib/eal/x86/rte_power_intrinsics.c > +++ b/lib/eal/x86/rte_power_intrinsics.c > @@ -17,6 +17,78 @@ static struct power_wait_status { > volatile void *monitor_addr; /**< NULL if not currently sleeping */ > } __rte_cache_aligned wait_status[RTE_MAX_LCORE]; > > +/** > + * This functions uses UMONITOR/UMWAIT instructions and will enter C0.2 state. > + * For more information about usage of these instructions, please refer to > + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual. > + */ > +static void intel_umonitor(volatile void *addr) > +{ > +#if defined(RTE_TOOLCHAIN_MSVC) || defined(__WAITPKG__) > + /* cast away "volatile" when using the intrinsic */ > + _umonitor((void *)(uintptr_t)addr); > +#else > + /* > + * we're using raw byte codes for compiler versions which > + * don't support this instruction natively. > + */ > + asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;" > + : > + : "D"(addr)); > +#endif > +} > + > +static void intel_umwait(const uint64_t timeout) > +{ > + const uint32_t tsc_l = (uint32_t)timeout; > + const uint32_t tsc_h = (uint32_t)(timeout >> 32); > +#if defined(RTE_TOOLCHAIN_MSVC) || defined(__WAITPKG__) > + _umwait(tsc_l, tsc_h); > +#else > + asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;" > + : /* ignore rflags */ > + : "D"(0), /* enter C0.2 */ > + "a"(tsc_l), "d"(tsc_h)); > +#endif > +} > + > +/** > + * This functions uses MONITORX/MWAITX instructions and will enter C1 state. > + * For more information about usage of these instructions, please refer to > + * AMD64 Architecture Programmer’s Manual. > + */ > +static void amd_monitorx(volatile void *addr) > +{ > +#if defined(__MWAITX__) > + /* cast away "volatile" when using the intrinsic */ > + _mm_monitorx((void *)(uintptr_t)addr, 0, 0); > +#else > + asm volatile(".byte 0x0f, 0x01, 0xfa;" > + : > + : "a"(addr), > + "c"(0), /* no extensions */ > + "d"(0)); /* no hints */ > +#endif > +} > + > +static void amd_mwaitx(const uint64_t timeout) > +{ > + RTE_SET_USED(timeout); > +#if defined(__MWAITX__) > + _mm_mwaitx(0, 0, 0); > +#else > + asm volatile(".byte 0x0f, 0x01, 0xfb;" > + : /* ignore rflags */ > + : "a"(0), /* enter C1 */ > + "c"(0)); /* no time-out */ > +#endif > +} > + > +static struct { > + void (*mmonitor)(volatile void *addr); > + void (*mwait)(const uint64_t timeout); > +} __rte_cache_aligned power_monitor_ops; > + > static inline void > __umwait_wakeup(volatile void *addr) > { > @@ -76,8 +148,6 @@ int > rte_power_monitor(const struct rte_power_monitor_cond *pmc, > const uint64_t tsc_timestamp) > { > - const uint32_t tsc_l = (uint32_t)tsc_timestamp; > - const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32); > const unsigned int lcore_id = rte_lcore_id(); > struct power_wait_status *s; > uint64_t cur_value; > @@ -105,19 +175,8 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, > rte_spinlock_lock(&s->lock); > s->monitor_addr = pmc->addr; > > - /* set address for UMONITOR */ > -#if defined(RTE_TOOLCHAIN_MSVC) || defined(__WAITPKG__) > - /* cast away "volatile" when using the intrinsic */ > - _umonitor((void *)(uintptr_t)pmc->addr); > -#else > - /* > - * we're using raw byte codes for compiler versions which > - * don't support this instruction natively. > - */ > - asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;" > - : > - : "D"(pmc->addr)); > -#endif > + /* set address for memory monitor */ > + power_monitor_ops.mmonitor(pmc->addr); > > /* now that we've put this address into monitor, we can unlock */ > rte_spinlock_unlock(&s->lock); > @@ -128,15 +187,8 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, > if (pmc->fn(cur_value, pmc->opaque) != 0) > goto end; > > - /* execute UMWAIT */ > -#if defined(RTE_TOOLCHAIN_MSVC) || defined(__WAITPKG__) > - _umwait(tsc_l, tsc_h); > -#else > - asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;" > - : /* ignore rflags */ > - : "D"(0), /* enter C0.2 */ > - "a"(tsc_l), "d"(tsc_h)); > -#endif > + /* execute mwait */ > + power_monitor_ops.mwait(tsc_timestamp); > > end: > /* erase sleep address */ > @@ -186,6 +238,14 @@ RTE_INIT(rte_power_intrinsics_init) { > wait_multi_supported = 1; > if (i.power_monitor) > monitor_supported = 1; > + > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MONITORX)) { /* AMD */ > + power_monitor_ops.mmonitor = &amd_monitorx; > + power_monitor_ops.mwait = &amd_mwaitx; > + } else { /* Intel */ > + power_monitor_ops.mmonitor = &intel_umonitor; > + power_monitor_ops.mwait = &intel_umwait; > + } > } > > int Acked-by: Konstantin Ananyev