DPDK patches and discussions
 help / color / mirror / Atom feed
From: Sivaprasad Tummala <Sivaprasad.Tummala@amd.com>
To: <david.hunt@intel.com>
Cc: <dev@dpdk.org>
Subject: [PATCH v1] power: amd power monitor support
Date: Mon, 10 Apr 2023 03:21:49 -0700	[thread overview]
Message-ID: <20230410102149.198389-1-Sivaprasad.Tummala@amd.com> (raw)

mwaitx allows epyc processors to enter a implementation dependent
power/performance optimized state (C1 state) for a specific period
or until a store to the monitored address range.

Signed-off-by: Sivaprasad Tummala <Sivaprasad.Tummala@amd.com>
---
 lib/eal/include/generic/rte_cpuflags.h |  2 +
 lib/eal/x86/include/rte_cpuflags.h     |  1 +
 lib/eal/x86/rte_cpuflags.c             |  3 +
 lib/eal/x86/rte_power_intrinsics.c     | 80 +++++++++++++++++++++++++-
 lib/power/rte_power_pmd_mgmt.c         |  3 +-
 5 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/lib/eal/include/generic/rte_cpuflags.h b/lib/eal/include/generic/rte_cpuflags.h
index d35551e931..db653a8dd7 100644
--- a/lib/eal/include/generic/rte_cpuflags.h
+++ b/lib/eal/include/generic/rte_cpuflags.h
@@ -26,6 +26,8 @@ struct rte_cpu_intrinsics {
 	/**< indicates support for rte_power_pause function */
 	uint32_t power_monitor_multi : 1;
 	/**< indicates support for rte_power_monitor_multi function */
+	uint32_t amd_power_monitorx : 1;
+	/**< indicates amd support for rte_power_monitor function */
 };
 
 /**
diff --git a/lib/eal/x86/include/rte_cpuflags.h b/lib/eal/x86/include/rte_cpuflags.h
index 92e90fb6e0..d3e608533a 100644
--- a/lib/eal/x86/include/rte_cpuflags.h
+++ b/lib/eal/x86/include/rte_cpuflags.h
@@ -133,6 +133,7 @@ enum rte_cpu_flag_t {
 	RTE_CPUFLAG_AVX512VP2INTERSECT,     /**< AVX512 Two Register Intersection */
 
 	RTE_CPUFLAG_WAITPKG,                /**< UMONITOR/UMWAIT/TPAUSE */
+	RTE_CPUFLAG_MONITORX,               /**< MONITORX */
 
 	/* The last item */
 	RTE_CPUFLAG_NUMFLAGS,               /**< This should always be the last! */
diff --git a/lib/eal/x86/rte_cpuflags.c b/lib/eal/x86/rte_cpuflags.c
index d6b518251b..ae2e0a8470 100644
--- a/lib/eal/x86/rte_cpuflags.c
+++ b/lib/eal/x86/rte_cpuflags.c
@@ -133,6 +133,7 @@ const struct feature_entry rte_cpu_feature_table[] = {
 
 	FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX,  0)
 	FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX,  4)
+	FEAT_DEF(MONITORX, 0x80000001, 0, RTE_REG_ECX,  29)
 
 	FEAT_DEF(SYSCALL, 0x80000001, 0, RTE_REG_EDX, 11)
 	FEAT_DEF(XD, 0x80000001, 0, RTE_REG_EDX, 20)
@@ -191,5 +192,7 @@ rte_cpu_get_intrinsics_support(struct rte_cpu_intrinsics *intrinsics)
 		intrinsics->power_pause = 1;
 		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM))
 			intrinsics->power_monitor_multi = 1;
+	} else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MONITORX)) {
+		intrinsics->amd_power_monitorx = 1;
 	}
 }
diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c
index f749da9b85..4e81870387 100644
--- a/lib/eal/x86/rte_power_intrinsics.c
+++ b/lib/eal/x86/rte_power_intrinsics.c
@@ -30,6 +30,7 @@ __umwait_wakeup(volatile void *addr)
 
 static bool wait_supported;
 static bool wait_multi_supported;
+static bool amd_mwaitx_supported;
 
 static inline uint64_t
 __get_umwait_val(const volatile void *p, const uint8_t sz)
@@ -65,6 +66,76 @@ __check_val_size(const uint8_t sz)
 	}
 }
 
+/**
+ * This function uses MONITORX/MWAITX instructions and will enter C1 state.
+ * For more information about usage of these instructions, please refer to
+ * AMD64 Architecture Programmer’s Manual.
+ */
+static inline int
+amd_power_monitorx(const struct rte_power_monitor_cond *pmc,
+		const uint64_t tsc_timestamp)
+{
+	const unsigned int lcore_id = rte_lcore_id();
+	struct power_wait_status *s;
+	uint64_t cur_value;
+
+	RTE_SET_USED(tsc_timestamp);
+
+	/* prevent non-EAL thread from using this API */
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	if (pmc == NULL)
+		return -EINVAL;
+
+	if (__check_val_size(pmc->size) < 0)
+		return -EINVAL;
+
+	if (pmc->fn == NULL)
+		return -EINVAL;
+
+	s = &wait_status[lcore_id];
+
+	/* update sleep address */
+	rte_spinlock_lock(&s->lock);
+	s->monitor_addr = pmc->addr;
+
+	/*
+	 * we're using raw byte codes for now as only the newest compiler
+	 * versions support this instruction natively.
+	 */
+	/* set address for MONITORX */
+	asm volatile(".byte 0x0f, 0x01, 0xfa;"
+			:
+			: "a"(pmc->addr),
+			"c"(0),  /* no extensions */
+			"d"(0)); /* no hints */
+
+	/* now that we've put this address into monitor, we can unlock */
+	rte_spinlock_unlock(&s->lock);
+
+	cur_value = __get_umwait_val(pmc->addr, pmc->size);
+
+	/* check if callback indicates we should abort */
+	if (pmc->fn(cur_value, pmc->opaque) != 0)
+		goto end;
+
+	/* execute MWAITX */
+	asm volatile(".byte 0x0f, 0x01, 0xfb;"
+			: /* ignore rflags */
+			: "a"(0), /* enter C1 */
+			"c"(0), /* no time-out */
+			"b"(0));
+
+end:
+	/* erase sleep address */
+	rte_spinlock_lock(&s->lock);
+	s->monitor_addr = NULL;
+	rte_spinlock_unlock(&s->lock);
+
+	return 0;
+}
+
 /**
  * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
  * For more information about usage of these instructions, please refer to
@@ -80,6 +151,9 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
 	struct power_wait_status *s;
 	uint64_t cur_value;
 
+	if (amd_mwaitx_supported)
+		return amd_power_monitorx(pmc, tsc_timestamp);
+
 	/* prevent user from running this instruction if it's not supported */
 	if (!wait_supported)
 		return -ENOTSUP;
@@ -126,7 +200,7 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
 	asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
 			: /* ignore rflags */
 			: "D"(0), /* enter C0.2 */
-			  "a"(tsc_l), "d"(tsc_h));
+			"a"(tsc_l), "d"(tsc_h));
 
 end:
 	/* erase sleep address */
@@ -170,6 +244,8 @@ RTE_INIT(rte_power_intrinsics_init) {
 		wait_supported = 1;
 	if (i.power_monitor_multi)
 		wait_multi_supported = 1;
+	if (i.amd_power_monitorx)
+		amd_mwaitx_supported = 1;
 }
 
 int
@@ -178,7 +254,7 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
 	struct power_wait_status *s;
 
 	/* prevent user from running this instruction if it's not supported */
-	if (!wait_supported)
+	if (!wait_supported && !amd_mwaitx_supported)
 		return -ENOTSUP;
 
 	/* prevent buffer overrun */
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index ca1840387c..43971e6014 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -447,7 +447,8 @@ check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata)
 	bool multimonitor_supported;
 
 	/* check if rte_power_monitor is supported */
-	if (!global_data.intrinsics_support.power_monitor) {
+	if ((!global_data.intrinsics_support.power_monitor) &&
+	    (!global_data.intrinsics_support.amd_power_monitorx)) {
 		RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n");
 		return -ENOTSUP;
 	}
-- 
2.34.1


                 reply	other threads:[~2023-04-10 10:22 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230410102149.198389-1-Sivaprasad.Tummala@amd.com \
    --to=sivaprasad.tummala@amd.com \
    --cc=david.hunt@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).