* [PATCH] power: add unpriv. read of turbo % for pstate
@ 2022-08-24 19:28 Markus Theil
2022-08-25 15:09 ` Hunt, David
2022-08-25 18:22 ` [PATCH v2] " Markus Theil
0 siblings, 2 replies; 4+ messages in thread
From: Markus Theil @ 2022-08-24 19:28 UTC (permalink / raw)
To: David Hunt; +Cc: dev, Markus Theil
If DPDK applications should be used with a minimal set of privileges,
using the msr kernel module on linux should not be necessary.
Since at least kernel 4.4 the rdmsr call to obtain the last non-turbo
boost frequency can be left out, if the sysfs interface is used.
Also RHEL 7 with recent kernel updates should include the sysfs interface
for this (I only looked this up for CentOS 7).
Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
---
lib/power/power_pstate_cpufreq.c | 69 ++++++++++++++++++--------------
1 file changed, 40 insertions(+), 29 deletions(-)
diff --git a/lib/power/power_pstate_cpufreq.c b/lib/power/power_pstate_cpufreq.c
index 78c9197695..c3d66a8f68 100644
--- a/lib/power/power_pstate_cpufreq.c
+++ b/lib/power/power_pstate_cpufreq.c
@@ -35,15 +35,9 @@
"/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq"
#define POWER_SYSFILE_BASE_FREQ \
"/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
+#define POWER_SYSFILE_TURBO_PCT \
+ "/sys/devices/system/cpu/intel_pstate/turbo_pct"
#define POWER_PSTATE_DRIVER "intel_pstate"
-#define POWER_MSR_PATH "/dev/cpu/%u/msr"
-
-/*
- * MSR related
- */
-#define PLATFORM_INFO 0x0CE
-#define NON_TURBO_MASK 0xFF00
-#define NON_TURBO_OFFSET 0x8
enum power_state {
@@ -74,37 +68,33 @@ struct pstate_power_info {
static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE];
/**
- * It is to read the specific MSR.
+ * It is to read the turbo mode percentage from sysfs
*/
-
static int32_t
-power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id)
+power_read_turbo_pct(uint64_t *outVal)
{
int fd, ret;
- char fullpath[PATH_MAX];
+ char val[4] = {0};
- snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id);
-
- fd = open(fullpath, O_RDONLY);
+ fd = open(POWER_SYSFILE_TURBO_PCT, O_RDONLY);
if (fd < 0) {
- RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath,
+ RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", POWER_SYSFILE_TURBO_PCT,
strerror(errno));
return fd;
}
- ret = pread(fd, val, sizeof(uint64_t), msr);
+ ret = read(fd, val, sizeof(val));
if (ret < 0) {
- RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", fullpath,
+ RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", POWER_SYSFILE_TURBO_PCT,
strerror(errno));
goto out;
}
- POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n",
- fullpath, msr, lcore_id);
+ *outVal = (uint64_t) atol(val);
- POWER_DEBUG_TRACE("Ret value %d, content is 0x%"PRIx64"\n", ret, *val);
+ POWER_DEBUG_TRACE("power turbo pct: %"PRIu64"\n", *outVal);
out: close(fd);
return ret;
@@ -116,8 +106,9 @@ out: close(fd);
static int
power_init_for_setting_freq(struct pstate_power_info *pi)
{
- FILE *f_base = NULL, *f_base_max = NULL, *f_min = NULL, *f_max = NULL;
- uint32_t base_ratio, base_max_ratio;
+ FILE *f_base = NULL, *f_base_min = NULL, *f_base_max = NULL,
+ *f_min = NULL, *f_max = NULL;
+ uint32_t base_ratio, base_min_ratio, base_max_ratio;
uint64_t max_non_turbo;
int ret;
@@ -130,6 +121,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
goto err;
}
+ open_core_sysfs_file(&f_base_min, "r", POWER_SYSFILE_BASE_MIN_FREQ,
+ pi->lcore_id);
+ if (f_base_min == NULL) {
+ RTE_LOG(ERR, POWER, "failed to open %s\n",
+ POWER_SYSFILE_BASE_MIN_FREQ);
+ goto err;
+ }
+
open_core_sysfs_file(&f_min, "rw+", POWER_SYSFILE_MIN_FREQ,
pi->lcore_id);
if (f_min == NULL) {
@@ -158,6 +157,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
goto err;
}
+ /* read base min ratio */
+ ret = read_core_sysfs_u32(f_base_min, &base_min_ratio);
+ if (ret < 0) {
+ RTE_LOG(ERR, POWER, "Failed to read %s\n",
+ POWER_SYSFILE_BASE_MIN_FREQ);
+ goto err;
+ }
+
/* base ratio may not exist */
if (f_base != NULL) {
ret = read_core_sysfs_u32(f_base, &base_ratio);
@@ -170,20 +177,22 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
base_ratio = 0;
}
- /* Add MSR read to detect turbo status */
- if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0)
- goto err;
- /* no errors after this point */
-
/* convert ratios to bins */
base_max_ratio /= BUS_FREQ;
+ base_min_ratio /= BUS_FREQ;
base_ratio /= BUS_FREQ;
/* assign file handles */
pi->f_cur_min = f_min;
pi->f_cur_max = f_max;
- max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET;
+ /* try to get turbo from global sysfs entry for less privileges than from MSR */
+ if (power_read_turbo_pct(&max_non_turbo) < 0)
+ goto err;
+ /* no errors after this point */
+
+ max_non_turbo = base_min_ratio
+ + (100 - max_non_turbo) * (base_max_ratio - base_min_ratio) / 100;
POWER_DEBUG_TRACE("no turbo perf %"PRIu64"\n", max_non_turbo);
@@ -220,6 +229,8 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
err:
if (f_base != NULL)
fclose(f_base);
+ if (f_base_min != NULL)
+ fclose(f_base_min);
if (f_base_max != NULL)
fclose(f_base_max);
if (f_min != NULL)
--
2.37.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] power: add unpriv. read of turbo % for pstate
2022-08-24 19:28 [PATCH] power: add unpriv. read of turbo % for pstate Markus Theil
@ 2022-08-25 15:09 ` Hunt, David
2022-08-25 18:22 ` [PATCH v2] " Markus Theil
1 sibling, 0 replies; 4+ messages in thread
From: Hunt, David @ 2022-08-25 15:09 UTC (permalink / raw)
To: Markus Theil; +Cc: dev
Hi Markus,
On 24/08/2022 20:28, Markus Theil wrote:
> If DPDK applications should be used with a minimal set of privileges,
> using the msr kernel module on linux should not be necessary.
>
> Since at least kernel 4.4 the rdmsr call to obtain the last non-turbo
> boost frequency can be left out, if the sysfs interface is used.
> Also RHEL 7 with recent kernel updates should include the sysfs interface
> for this (I only looked this up for CentOS 7).
>
> Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
> ---
> lib/power/power_pstate_cpufreq.c | 69 ++++++++++++++++++--------------
> 1 file changed, 40 insertions(+), 29 deletions(-)
>
> diff --git a/lib/power/power_pstate_cpufreq.c b/lib/power/power_pstate_cpufreq.c
> index 78c9197695..c3d66a8f68 100644
> --- a/lib/power/power_pstate_cpufreq.c
> +++ b/lib/power/power_pstate_cpufreq.c
> @@ -35,15 +35,9 @@
> "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq"
> #define POWER_SYSFILE_BASE_FREQ \
> "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
> +#define POWER_SYSFILE_TURBO_PCT \
> + "/sys/devices/system/cpu/intel_pstate/turbo_pct"
> #define POWER_PSTATE_DRIVER "intel_pstate"
> -#define POWER_MSR_PATH "/dev/cpu/%u/msr"
> -
> -/*
> - * MSR related
> - */
> -#define PLATFORM_INFO 0x0CE
> -#define NON_TURBO_MASK 0xFF00
> -#define NON_TURBO_OFFSET 0x8
>
>
> enum power_state {
> @@ -74,37 +68,33 @@ struct pstate_power_info {
> static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE];
>
> /**
> - * It is to read the specific MSR.
> + * It is to read the turbo mode percentage from sysfs
> */
> -
> static int32_t
> -power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id)
> +power_read_turbo_pct(uint64_t *outVal)
> {
> int fd, ret;
> - char fullpath[PATH_MAX];
> + char val[4] = {0};
>
> - snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id);
> -
> - fd = open(fullpath, O_RDONLY);
> + fd = open(POWER_SYSFILE_TURBO_PCT, O_RDONLY);
>
> if (fd < 0) {
> - RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath,
> + RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", POWER_SYSFILE_TURBO_PCT,
> strerror(errno));
> return fd;
> }
>
> - ret = pread(fd, val, sizeof(uint64_t), msr);
> + ret = read(fd, val, sizeof(val));
>
> if (ret < 0) {
> - RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", fullpath,
> + RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", POWER_SYSFILE_TURBO_PCT,
> strerror(errno));
> goto out;
> }
>
> - POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n",
> - fullpath, msr, lcore_id);
> + *outVal = (uint64_t) atol(val);
>
I'd recommend replacing atol with strtol, it's a safer implementation.
It's more commonly found in DPDK code than atol.
> - POWER_DEBUG_TRACE("Ret value %d, content is 0x%"PRIx64"\n", ret, *val);
> + POWER_DEBUG_TRACE("power turbo pct: %"PRIu64"\n", *outVal);
>
> out: close(fd);
> return ret;
> @@ -116,8 +106,9 @@ out: close(fd);
> static int
> power_init_for_setting_freq(struct pstate_power_info *pi)
> {
> - FILE *f_base = NULL, *f_base_max = NULL, *f_min = NULL, *f_max = NULL;
> - uint32_t base_ratio, base_max_ratio;
> + FILE *f_base = NULL, *f_base_min = NULL, *f_base_max = NULL,
> + *f_min = NULL, *f_max = NULL;
> + uint32_t base_ratio, base_min_ratio, base_max_ratio;
> uint64_t max_non_turbo;
> int ret;
>
> @@ -130,6 +121,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
> goto err;
> }
>
> + open_core_sysfs_file(&f_base_min, "r", POWER_SYSFILE_BASE_MIN_FREQ,
> + pi->lcore_id);
> + if (f_base_min == NULL) {
> + RTE_LOG(ERR, POWER, "failed to open %s\n",
> + POWER_SYSFILE_BASE_MIN_FREQ);
> + goto err;
> + }
> +
> open_core_sysfs_file(&f_min, "rw+", POWER_SYSFILE_MIN_FREQ,
> pi->lcore_id);
> if (f_min == NULL) {
> @@ -158,6 +157,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
> goto err;
> }
>
> + /* read base min ratio */
> + ret = read_core_sysfs_u32(f_base_min, &base_min_ratio);
> + if (ret < 0) {
> + RTE_LOG(ERR, POWER, "Failed to read %s\n",
> + POWER_SYSFILE_BASE_MIN_FREQ);
> + goto err;
> + }
> +
> /* base ratio may not exist */
> if (f_base != NULL) {
> ret = read_core_sysfs_u32(f_base, &base_ratio);
> @@ -170,20 +177,22 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
> base_ratio = 0;
> }
>
> - /* Add MSR read to detect turbo status */
> - if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0)
> - goto err;
> - /* no errors after this point */
> -
> /* convert ratios to bins */
> base_max_ratio /= BUS_FREQ;
> + base_min_ratio /= BUS_FREQ;
> base_ratio /= BUS_FREQ;
>
> /* assign file handles */
> pi->f_cur_min = f_min;
> pi->f_cur_max = f_max;
>
> - max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET;
> + /* try to get turbo from global sysfs entry for less privileges than from MSR */
> + if (power_read_turbo_pct(&max_non_turbo) < 0)
> + goto err;
> + /* no errors after this point */
> +
> + max_non_turbo = base_min_ratio
> + + (100 - max_non_turbo) * (base_max_ratio - base_min_ratio) / 100;
>
> POWER_DEBUG_TRACE("no turbo perf %"PRIu64"\n", max_non_turbo);
>
> @@ -220,6 +229,8 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
> err:
> if (f_base != NULL)
> fclose(f_base);
> + if (f_base_min != NULL)
> + fclose(f_base_min);
> if (f_base_max != NULL)
> fclose(f_base_max);
> if (f_min != NULL)
Nice patch.
I've run the patched code and can confirm that the max_non_turbo value
agrees with the relevant byte in the 0xCE MSR, so looks good.
Tested-By: David Hunt <david.hunt@intel.com>
Also, once the atol issue mentioned above is resolved:
Acked-By: David Hunt <david.hunt@intel.com>
Thanks!
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2] power: add unpriv. read of turbo % for pstate
2022-08-24 19:28 [PATCH] power: add unpriv. read of turbo % for pstate Markus Theil
2022-08-25 15:09 ` Hunt, David
@ 2022-08-25 18:22 ` Markus Theil
2022-10-10 0:54 ` Thomas Monjalon
1 sibling, 1 reply; 4+ messages in thread
From: Markus Theil @ 2022-08-25 18:22 UTC (permalink / raw)
To: David Hunt; +Cc: dev, Markus Theil
If DPDK applications should be used with a minimal set of privileges,
using the msr kernel module on linux should not be necessary.
Since at least kernel 4.4 the rdmsr call to obtain the last non-turbo
boost frequency can be left out, if the sysfs interface is used.
Also RHEL 7 with recent kernel updates should include the sysfs interface
for this (I only looked this up for CentOS 7).
Tested-by: David Hunt <david.hunt@intel.com>
Acked-by: David Hunt <david.hunt@intel.com>
Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
---
v2: atol -> strol (as suggested by David Hunt), close newly introduced fd
lib/power/power_pstate_cpufreq.c | 79 ++++++++++++++++++++------------
1 file changed, 50 insertions(+), 29 deletions(-)
diff --git a/lib/power/power_pstate_cpufreq.c b/lib/power/power_pstate_cpufreq.c
index 78c9197695..49ddb2eefd 100644
--- a/lib/power/power_pstate_cpufreq.c
+++ b/lib/power/power_pstate_cpufreq.c
@@ -3,6 +3,7 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
@@ -35,15 +36,9 @@
"/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq"
#define POWER_SYSFILE_BASE_FREQ \
"/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
+#define POWER_SYSFILE_TURBO_PCT \
+ "/sys/devices/system/cpu/intel_pstate/turbo_pct"
#define POWER_PSTATE_DRIVER "intel_pstate"
-#define POWER_MSR_PATH "/dev/cpu/%u/msr"
-
-/*
- * MSR related
- */
-#define PLATFORM_INFO 0x0CE
-#define NON_TURBO_MASK 0xFF00
-#define NON_TURBO_OFFSET 0x8
enum power_state {
@@ -74,37 +69,41 @@ struct pstate_power_info {
static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE];
/**
- * It is to read the specific MSR.
+ * It is to read the turbo mode percentage from sysfs
*/
-
static int32_t
-power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id)
+power_read_turbo_pct(uint64_t *outVal)
{
int fd, ret;
- char fullpath[PATH_MAX];
+ char val[4] = {0};
+ char *endptr;
- snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id);
-
- fd = open(fullpath, O_RDONLY);
+ fd = open(POWER_SYSFILE_TURBO_PCT, O_RDONLY);
if (fd < 0) {
- RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath,
+ RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", POWER_SYSFILE_TURBO_PCT,
strerror(errno));
return fd;
}
- ret = pread(fd, val, sizeof(uint64_t), msr);
+ ret = read(fd, val, sizeof(val));
if (ret < 0) {
- RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", fullpath,
+ RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", POWER_SYSFILE_TURBO_PCT,
strerror(errno));
goto out;
}
- POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n",
- fullpath, msr, lcore_id);
+ errno = 0;
+ *outVal = (uint64_t) strtol(val, &endptr, 10);
+ if (*endptr != 0 || errno != 0) {
+ RTE_LOG(ERR, POWER, "Error converting str to digits, read from %s: %s\n",
+ POWER_SYSFILE_TURBO_PCT, strerror(errno));
+ ret = -1;
+ goto out;
+ }
- POWER_DEBUG_TRACE("Ret value %d, content is 0x%"PRIx64"\n", ret, *val);
+ POWER_DEBUG_TRACE("power turbo pct: %"PRIu64"\n", *outVal);
out: close(fd);
return ret;
@@ -116,8 +115,9 @@ out: close(fd);
static int
power_init_for_setting_freq(struct pstate_power_info *pi)
{
- FILE *f_base = NULL, *f_base_max = NULL, *f_min = NULL, *f_max = NULL;
- uint32_t base_ratio, base_max_ratio;
+ FILE *f_base = NULL, *f_base_min = NULL, *f_base_max = NULL,
+ *f_min = NULL, *f_max = NULL;
+ uint32_t base_ratio, base_min_ratio, base_max_ratio;
uint64_t max_non_turbo;
int ret;
@@ -130,6 +130,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
goto err;
}
+ open_core_sysfs_file(&f_base_min, "r", POWER_SYSFILE_BASE_MIN_FREQ,
+ pi->lcore_id);
+ if (f_base_min == NULL) {
+ RTE_LOG(ERR, POWER, "failed to open %s\n",
+ POWER_SYSFILE_BASE_MIN_FREQ);
+ goto err;
+ }
+
open_core_sysfs_file(&f_min, "rw+", POWER_SYSFILE_MIN_FREQ,
pi->lcore_id);
if (f_min == NULL) {
@@ -158,6 +166,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
goto err;
}
+ /* read base min ratio */
+ ret = read_core_sysfs_u32(f_base_min, &base_min_ratio);
+ if (ret < 0) {
+ RTE_LOG(ERR, POWER, "Failed to read %s\n",
+ POWER_SYSFILE_BASE_MIN_FREQ);
+ goto err;
+ }
+
/* base ratio may not exist */
if (f_base != NULL) {
ret = read_core_sysfs_u32(f_base, &base_ratio);
@@ -170,20 +186,22 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
base_ratio = 0;
}
- /* Add MSR read to detect turbo status */
- if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0)
- goto err;
- /* no errors after this point */
-
/* convert ratios to bins */
base_max_ratio /= BUS_FREQ;
+ base_min_ratio /= BUS_FREQ;
base_ratio /= BUS_FREQ;
/* assign file handles */
pi->f_cur_min = f_min;
pi->f_cur_max = f_max;
- max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET;
+ /* try to get turbo from global sysfs entry for less privileges than from MSR */
+ if (power_read_turbo_pct(&max_non_turbo) < 0)
+ goto err;
+ /* no errors after this point */
+
+ max_non_turbo = base_min_ratio
+ + (100 - max_non_turbo) * (base_max_ratio - base_min_ratio) / 100;
POWER_DEBUG_TRACE("no turbo perf %"PRIu64"\n", max_non_turbo);
@@ -214,12 +232,15 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
if (f_base != NULL)
fclose(f_base);
fclose(f_base_max);
+ fclose(f_base_min);
/* f_min and f_max are stored, no need to close */
return 0;
err:
if (f_base != NULL)
fclose(f_base);
+ if (f_base_min != NULL)
+ fclose(f_base_min);
if (f_base_max != NULL)
fclose(f_base_max);
if (f_min != NULL)
--
2.37.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] power: add unpriv. read of turbo % for pstate
2022-08-25 18:22 ` [PATCH v2] " Markus Theil
@ 2022-10-10 0:54 ` Thomas Monjalon
0 siblings, 0 replies; 4+ messages in thread
From: Thomas Monjalon @ 2022-10-10 0:54 UTC (permalink / raw)
To: Markus Theil; +Cc: David Hunt, dev
25/08/2022 20:22, Markus Theil:
> If DPDK applications should be used with a minimal set of privileges,
> using the msr kernel module on linux should not be necessary.
>
> Since at least kernel 4.4 the rdmsr call to obtain the last non-turbo
> boost frequency can be left out, if the sysfs interface is used.
> Also RHEL 7 with recent kernel updates should include the sysfs interface
> for this (I only looked this up for CentOS 7).
>
> Tested-by: David Hunt <david.hunt@intel.com>
> Acked-by: David Hunt <david.hunt@intel.com>
> Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
Applied, thanks.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-10-10 0:54 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-24 19:28 [PATCH] power: add unpriv. read of turbo % for pstate Markus Theil
2022-08-25 15:09 ` Hunt, David
2022-08-25 18:22 ` [PATCH v2] " Markus Theil
2022-10-10 0:54 ` Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).