DPDK patches and discussions
 help / color / mirror / Atom feed
From: David Hunt <david.hunt@intel.com>
To: dev@dpdk.org
Cc: david.hunt@intel.com, thomas@monjalon.net
Subject: [dpdk-dev] [PATCH v4 3/9] examples/vm_power: add oob monitoring functions
Date: Fri, 13 Jul 2018 15:22:56 +0100	[thread overview]
Message-ID: <20180713142302.34576-4-david.hunt@intel.com> (raw)
In-Reply-To: <20180713142302.34576-1-david.hunt@intel.com>

This patch introduces the out-of-band (oob) core monitoring
functions.

The functions are similar to the channel manager functions.
There are function to add and remove cores from the
list of cores being monitored. There is a function to initialise
the monitor setup, run the monitor thread, and exit the monitor.

The monitor thread runs in it's own lcore, and is separate
functionality to the channel monitor which is epoll based.
THis thread is timer based. It loops through all monitored cores,
calculates the branch ratio, scales up or down the core, then
sleeps for an interval (~250 uS).

The method it uses to read the branch counters is a pread on the
/dev/cpu/x/msr file, so the 'msr' kernel module needs to be loaded.
Also, since the msr.h file has been made unavailable in recent
kernels, we have #defines for the relevant MSRs included in the
code.

The makefile has a switch for x86 and non-x86 platforms,
and compiles stub function for non-x86 platforms.

Signed-off-by: David Hunt <david.hunt@intel.com>
Acked-by: Radu Nicolau <radu.nicolau@intel.com>
---
 examples/vm_power_manager/Makefile          |   5 +
 examples/vm_power_manager/oob_monitor.h     |  68 +++++
 examples/vm_power_manager/oob_monitor_nop.c |  38 +++
 examples/vm_power_manager/oob_monitor_x86.c | 259 ++++++++++++++++++++
 4 files changed, 370 insertions(+)
 create mode 100644 examples/vm_power_manager/oob_monitor.h
 create mode 100644 examples/vm_power_manager/oob_monitor_nop.c
 create mode 100644 examples/vm_power_manager/oob_monitor_x86.c

diff --git a/examples/vm_power_manager/Makefile b/examples/vm_power_manager/Makefile
index 0c925967c..13a5205ba 100644
--- a/examples/vm_power_manager/Makefile
+++ b/examples/vm_power_manager/Makefile
@@ -20,6 +20,11 @@ APP = vm_power_mgr
 # all source are stored in SRCS-y
 SRCS-y := main.c vm_power_cli.c power_manager.c channel_manager.c
 SRCS-y += channel_monitor.c parse.c
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+SRCS-y += oob_monitor_x86.c
+else
+SRCS-y += oob_monitor_nop.c
+endif
 
 CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/
 CFLAGS += $(WERROR_FLAGS)
diff --git a/examples/vm_power_manager/oob_monitor.h b/examples/vm_power_manager/oob_monitor.h
new file mode 100644
index 000000000..b96e08df7
--- /dev/null
+++ b/examples/vm_power_manager/oob_monitor.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef OOB_MONITOR_H_
+#define OOB_MONITOR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Setup the Branch Monitor resources required to initialize epoll.
+ * Must be called first before calling other functions.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int branch_monitor_init(void);
+
+/**
+ * Run the OOB branch monitor, loops forever on on epoll_wait.
+ *
+ *
+ * @return
+ *  None
+ */
+void run_branch_monitor(void);
+
+/**
+ * Exit the OOB Branch Monitor.
+ *
+ * @return
+ *  None
+ */
+void branch_monitor_exit(void);
+
+/**
+ * Add a core to the list of cores to monitor.
+ *
+ * @param core
+ *  Core Number
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int add_core_to_monitor(int core);
+
+/**
+ * Remove a previously added core from core list.
+ *
+ * @param core
+ *  Core Number
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int remove_core_from_monitor(int core);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* OOB_MONITOR_H_ */
diff --git a/examples/vm_power_manager/oob_monitor_nop.c b/examples/vm_power_manager/oob_monitor_nop.c
new file mode 100644
index 000000000..7e7b8bc14
--- /dev/null
+++ b/examples/vm_power_manager/oob_monitor_nop.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include "oob_monitor.h"
+
+void branch_monitor_exit(void)
+{
+}
+
+__attribute__((unused)) static float
+apply_policy(__attribute__((unused)) int core)
+{
+	return 0.0;
+}
+
+int
+add_core_to_monitor(__attribute__((unused)) int core)
+{
+	return 0;
+}
+
+int
+remove_core_from_monitor(__attribute__((unused)) int core)
+{
+	return 0;
+}
+
+int
+branch_monitor_init(void)
+{
+	return 0;
+}
+
+void
+run_branch_monitor(void)
+{
+}
diff --git a/examples/vm_power_manager/oob_monitor_x86.c b/examples/vm_power_manager/oob_monitor_x86.c
new file mode 100644
index 000000000..62d503ca5
--- /dev/null
+++ b/examples/vm_power_manager/oob_monitor_x86.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <rte_log.h>
+
+#include "oob_monitor.h"
+#include "power_manager.h"
+#include "channel_manager.h"
+
+static volatile unsigned run_loop = 1;
+static uint64_t g_branches, g_branch_misses;
+static int g_active;
+
+void branch_monitor_exit(void)
+{
+	run_loop = 0;
+}
+
+/* Number of microseconds between each poll */
+#define INTERVAL 100
+#define PRINT_LOOP_COUNT (1000000/INTERVAL)
+#define RATIO_THRESHOLD 0.03
+#define IA32_PERFEVTSEL0 0x186
+#define IA32_PERFEVTSEL1 0x187
+#define IA32_PERFCTR0 0xc1
+#define IA32_PERFCTR1 0xc2
+#define IA32_PERFEVT_BRANCH_HITS 0x05300c4
+#define IA32_PERFEVT_BRANCH_MISS 0x05300c5
+
+static float
+apply_policy(int core)
+{
+	struct core_info *ci;
+	uint64_t counter;
+	uint64_t branches, branch_misses;
+	uint32_t last_branches, last_branch_misses;
+	int hits_diff, miss_diff;
+	float ratio;
+	int ret;
+
+	g_active = 0;
+	ci = get_core_info();
+
+	last_branches = ci->cd[core].last_branches;
+	last_branch_misses = ci->cd[core].last_branch_misses;
+
+	ret = pread(ci->cd[core].msr_fd, &counter,
+			sizeof(counter), IA32_PERFCTR0);
+	if (ret < 0)
+		RTE_LOG(ERR, POWER_MANAGER,
+				"unable to read counter for core %u\n",
+				core);
+	branches = counter;
+
+	ret = pread(ci->cd[core].msr_fd, &counter,
+			sizeof(counter), IA32_PERFCTR1);
+	if (ret < 0)
+		RTE_LOG(ERR, POWER_MANAGER,
+				"unable to read counter for core %u\n",
+				core);
+	branch_misses = counter;
+
+
+	ci->cd[core].last_branches = branches;
+	ci->cd[core].last_branch_misses = branch_misses;
+
+	hits_diff = (int)branches - (int)last_branches;
+	if (hits_diff <= 0) {
+		/* Likely a counter overflow condition, skip this round */
+		return -1.0;
+	}
+
+	miss_diff = (int)branch_misses - (int)last_branch_misses;
+	if (miss_diff <= 0) {
+		/* Likely a counter overflow condition, skip this round */
+		return -1.0;
+	}
+
+	g_branches = hits_diff;
+	g_branch_misses = miss_diff;
+
+	if (hits_diff < (INTERVAL*100)) {
+		/* Likely no workload running on this core. Skip. */
+		return -1.0;
+	}
+
+	ratio = (float)miss_diff * (float)100 / (float)hits_diff;
+
+	if (ratio < RATIO_THRESHOLD)
+		power_manager_scale_core_min(core);
+	else
+		power_manager_scale_core_max(core);
+
+	g_active = 1;
+	return ratio;
+}
+
+int
+add_core_to_monitor(int core)
+{
+	struct core_info *ci;
+	char proc_file[UNIX_PATH_MAX];
+	int ret;
+
+	ci = get_core_info();
+
+	if (core < ci->core_count) {
+		long setup;
+
+		snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
+		ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
+		if (ci->cd[core].msr_fd < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"Error opening MSR file for core %d "
+					"(is msr kernel module loaded?)\n",
+					core);
+			return -1;
+		}
+		/*
+		 * Set up branch counters
+		 */
+		setup = IA32_PERFEVT_BRANCH_HITS;
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL0);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+		setup = IA32_PERFEVT_BRANCH_MISS;
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL1);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+		/*
+		 * Close the file and re-open as read only so
+		 * as not to hog the resource
+		 */
+		close(ci->cd[core].msr_fd);
+		ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
+		if (ci->cd[core].msr_fd < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"Error opening MSR file for core %d "
+					"(is msr kernel module loaded?)\n",
+					core);
+			return -1;
+		}
+		ci->cd[core].oob_enabled = 1;
+	}
+	return 0;
+}
+
+int
+remove_core_from_monitor(int core)
+{
+	struct core_info *ci;
+	char proc_file[UNIX_PATH_MAX];
+	int ret;
+
+	ci = get_core_info();
+
+	if (ci->cd[core].oob_enabled) {
+		long setup;
+
+		/*
+		 * close the msr file, then reopen rw so we can
+		 * disable the counters
+		 */
+		if (ci->cd[core].msr_fd != 0)
+			close(ci->cd[core].msr_fd);
+		snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
+		ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
+		if (ci->cd[core].msr_fd < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"Error opening MSR file for core %d "
+					"(is msr kernel module loaded?)\n",
+					core);
+			return -1;
+		}
+		setup = 0x0; /* clear event */
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL0);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+		setup = 0x0; /* clear event */
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL1);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+
+		close(ci->cd[core].msr_fd);
+		ci->cd[core].msr_fd = 0;
+		ci->cd[core].oob_enabled = 0;
+	}
+	return 0;
+}
+
+int
+branch_monitor_init(void)
+{
+	return 0;
+}
+
+void
+run_branch_monitor(void)
+{
+	struct core_info *ci;
+	int print = 0;
+	float ratio;
+	int printed;
+	int reads = 0;
+
+	ci = get_core_info();
+
+	while (run_loop) {
+
+		if (!run_loop)
+			break;
+		usleep(INTERVAL);
+		int j;
+		print++;
+		printed = 0;
+		for (j = 0; j < ci->core_count; j++) {
+			if (ci->cd[j].oob_enabled) {
+				ratio = apply_policy(j);
+				if ((print > PRINT_LOOP_COUNT) && (g_active)) {
+					printf("  %d: %.4f {%lu} {%d}", j,
+							ratio, g_branches,
+							reads);
+					printed = 1;
+					reads = 0;
+				} else {
+					reads++;
+				}
+			}
+		}
+		if (print > PRINT_LOOP_COUNT) {
+			if (printed)
+				printf("\n");
+			print = 0;
+		}
+	}
+}
-- 
2.17.1

  parent reply	other threads:[~2018-07-13 14:23 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-07  7:36 [dpdk-dev] [PATCH v1 0/6] examples/vm_power: 100% Busy Polling David Hunt
2018-06-07  7:37 ` [dpdk-dev] [PATCH v1 1/6] examples/vm_power: add check for port count David Hunt
2018-06-21 13:24   ` [dpdk-dev] [PATCH v2 0/8] examples/vm_power: 100% Busy Polling David Hunt
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 1/8] examples/vm_power: add check for port count David Hunt
2018-06-26  9:23       ` [dpdk-dev] [0/9] examples/vm_power: 100% Busy Polling David Hunt
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 1/9] examples/vm_power: add check for port count David Hunt
2018-07-13 14:22           ` [dpdk-dev] [PATCH v4 0/9] examples/vm_power: 100% Busy Polling David Hunt
2018-07-13 14:22             ` [dpdk-dev] [PATCH v4 1/9] examples/vm_power: add check for port count David Hunt
2018-07-13 14:22             ` [dpdk-dev] [PATCH v4 2/9] examples/vm_power: add core list parameter David Hunt
2018-07-13 14:22             ` David Hunt [this message]
2018-07-13 14:22             ` [dpdk-dev] [PATCH v4 4/9] examples/vm_power: allow greater than 64 cores David Hunt
2018-07-13 14:22             ` [dpdk-dev] [PATCH v4 5/9] examples/vm_power: add thread for oob core monitor David Hunt
2018-07-13 14:22             ` [dpdk-dev] [PATCH v4 6/9] examples/vm_power: add port-list to command line David Hunt
2018-07-13 14:23             ` [dpdk-dev] [PATCH v4 7/9] examples/vm_power: add branch ratio policy type David Hunt
2018-07-13 14:23             ` [dpdk-dev] [PATCH v4 8/9] examples/vm_power: add cli args to guest app David Hunt
2018-07-13 14:23             ` [dpdk-dev] [PATCH v4 9/9] examples/vm_power: make branch ratio configurable David Hunt
2018-07-20 22:06             ` [dpdk-dev] [PATCH v4 0/9] examples/vm_power: 100% Busy Polling Thomas Monjalon
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 2/9] examples/vm_power: add core list parameter David Hunt
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 3/9] examples/vm_power: add oob monitoring functions David Hunt
2018-07-12 19:13           ` Thomas Monjalon
2018-07-12 22:18             ` Stephen Hemminger
2018-07-13  8:24             ` Hunt, David
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 4/9] examples/vm_power: allow greater than 64 cores David Hunt
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 5/9] examples/vm_power: add thread for oob core monitor David Hunt
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 6/9] examples/vm_power: add port-list to command line David Hunt
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 7/9] examples/vm_power: add branch ratio policy type David Hunt
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 8/9] examples/vm_power: add cli args to guest app David Hunt
2018-06-26  9:23         ` [dpdk-dev] [PATCH v3 9/9] examples/vm_power: make branch ratio configurable David Hunt
2018-07-12 19:09         ` [dpdk-dev] [0/9] examples/vm_power: 100% Busy Polling Thomas Monjalon
2018-07-13  8:31           ` Hunt, David
2018-07-13  8:33             ` Thomas Monjalon
2018-07-13  8:43               ` Hunt, David
2018-07-18 15:23                 ` Thomas Monjalon
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 2/8] examples/vm_power: add core list parameter David Hunt
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 3/8] examples/vm_power: add oob monitoring functions David Hunt
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 4/8] examples/vm_power: allow greater than 64 cores David Hunt
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 5/8] examples/vm_power: add thread for oob core monitor David Hunt
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 6/8] examples/vm_power: add port-list to command line David Hunt
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 7/8] examples/vm_power: add branch ratio policy type David Hunt
2018-06-21 13:24     ` [dpdk-dev] [PATCH v2 8/8] examples/vm_power: add cli args to guest app David Hunt
2018-06-21 14:28     ` [dpdk-dev] [PATCH v2 0/8] examples/vm_power: 100% Busy Polling Radu Nicolau
2018-06-07  7:37 ` [dpdk-dev] [PATCH v1 2/6] examples/vm_power: add core list parameter David Hunt
2018-06-07  7:37 ` [dpdk-dev] [PATCH v1 3/6] examples/vm_power: add oob monitoring functions David Hunt
2018-06-07  7:37 ` [dpdk-dev] [PATCH v1 4/6] examples/vm_power: allow greater than 64 cores David Hunt
2018-06-07  7:37 ` [dpdk-dev] [PATCH v1 5/6] examples/vm_power: add thread for oob core monitor David Hunt
2018-06-07  7:37 ` [dpdk-dev] [PATCH v1 6/6] examples/vm_power: add port-list to command line David Hunt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180713142302.34576-4-david.hunt@intel.com \
    --to=david.hunt@intel.com \
    --cc=dev@dpdk.org \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).