DPDK patches and discussions
 help / color / mirror / Atom feed
From: Sivaprasad Tummala <sivaprasad.tummala@amd.com>
To: <jerinjacobk@gmail.com>, <harry.van.haaren@intel.com>,
	<anatoly.burakov@intel.com>
Cc: <dev@dpdk.org>, <ferruh.yigit@amd.com>, <david.hunt@intel.com>
Subject: [PATCH v1 5/6] power: add eventdev support for power management
Date: Mon, 16 Oct 2023 13:57:14 -0700	[thread overview]
Message-ID: <20231016205715.970999-5-sivaprasad.tummala@amd.com> (raw)
In-Reply-To: <20231016205715.970999-1-sivaprasad.tummala@amd.com>

Add eventdev support to enable power saving when no events
are arriving. It is based on counting the number of empty
polls and, when the number reaches a certain threshold, entering
an architecture-defined optimized power state that will either wait
until a TSC timestamp expires, or when events arrive.

This API mandates a core-to-single-port mapping (i.e. one core polling
multiple ports of event device is not supported). This should be ok
as the general use case will have one CPU core using one port to
enqueue/dequeue events from an eventdev.

This design is using Eventdev PMD Dequeue callbacks.

1. MWAITX/MONITORX:

   When a certain threshold of empty polls is reached, the core will go
   into a power optimized sleep while waiting on an address of next RX
   descriptor to be written to.

2. Pause instruction

   This method uses the pause instruction to avoid busy polling.

Signed-off-by: Sivaprasad Tummala <sivaprasad.tummala@amd.com>
---
 lib/power/meson.build          |   2 +-
 lib/power/rte_power_pmd_mgmt.c | 226 +++++++++++++++++++++++++++++++++
 lib/power/rte_power_pmd_mgmt.h |  55 ++++++++
 lib/power/version.map          |   4 +
 4 files changed, 286 insertions(+), 1 deletion(-)

diff --git a/lib/power/meson.build b/lib/power/meson.build
index 056d0043d8..86e178bbb4 100644
--- a/lib/power/meson.build
+++ b/lib/power/meson.build
@@ -32,4 +32,4 @@ headers = files(
 if cc.has_argument('-Wno-cast-qual')
     cflags += '-Wno-cast-qual'
 endif
-deps += ['timer', 'ethdev']
+deps += ['timer', 'ethdev', 'eventdev']
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index 38f8384085..df3ac2d221 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -9,8 +9,10 @@
 #include <rte_cpuflags.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
+#include <rte_eventdev.h>
 #include <rte_power_intrinsics.h>
 
+#include <eventdev_pmd.h>
 #include "rte_power_pmd_mgmt.h"
 #include "power_common.h"
 
@@ -53,6 +55,7 @@ struct queue_list_entry {
 	uint64_t n_empty_polls;
 	uint64_t n_sleeps;
 	const struct rte_eth_rxtx_callback *cb;
+	const struct rte_event_dequeue_callback *evt_cb;
 };
 
 struct pmd_core_cfg {
@@ -414,6 +417,64 @@ cfg_queues_stopped(struct pmd_core_cfg *queue_cfg)
 	return 1;
 }
 
+static uint16_t
+evt_clb_umwait(uint8_t dev_id, uint8_t port_id, struct rte_event *ev __rte_unused,
+		uint16_t nb_events, void *arg)
+{
+	struct queue_list_entry *queue_conf = arg;
+
+	/* this callback can't do more than one queue, omit multiqueue logic */
+	if (unlikely(nb_events == 0)) {
+		queue_conf->n_empty_polls++;
+		if (unlikely(queue_conf->n_empty_polls > emptypoll_max)) {
+			struct rte_power_monitor_cond pmc;
+			int ret;
+
+			/* use monitoring condition to sleep */
+			ret = rte_event_port_get_monitor_addr(dev_id, port_id,
+					&pmc);
+			if (ret == 0)
+				rte_power_monitor(&pmc, UINT64_MAX);
+		}
+	} else
+		queue_conf->n_empty_polls = 0;
+
+	return nb_events;
+}
+
+static uint16_t
+evt_clb_pause(uint8_t dev_id __rte_unused, uint8_t port_id __rte_unused,
+		struct rte_event *ev __rte_unused,
+		uint16_t nb_events, void *arg)
+{
+	const unsigned int lcore = rte_lcore_id();
+	struct queue_list_entry *queue_conf = arg;
+	struct pmd_core_cfg *lcore_conf;
+	const bool empty = nb_events == 0;
+	uint32_t pause_duration = rte_power_pmd_mgmt_get_pause_duration();
+
+	lcore_conf = &lcore_cfgs[lcore];
+
+	if (likely(!empty))
+		/* early exit */
+		queue_reset(lcore_conf, queue_conf);
+	else {
+		/* can this queue sleep? */
+		if (!queue_can_sleep(lcore_conf, queue_conf))
+			return nb_events;
+
+		/* can this lcore sleep? */
+		if (!lcore_can_sleep(lcore_conf))
+			return nb_events;
+
+		uint64_t i;
+		for (i = 0; i < global_data.pause_per_us * pause_duration; i++)
+			rte_pause();
+	}
+
+	return nb_events;
+}
+
 static int
 check_scale(unsigned int lcore)
 {
@@ -481,6 +542,171 @@ get_monitor_callback(void)
 		clb_multiwait : clb_umwait;
 }
 
+static int
+check_evt_monitor(struct pmd_core_cfg *cfg __rte_unused,
+		const union queue *qdata)
+{
+	struct rte_power_monitor_cond dummy;
+
+	/* check if rte_power_monitor is supported */
+	if (!global_data.intrinsics_support.power_monitor) {
+		RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n");
+		return -ENOTSUP;
+	}
+
+	/* check if the device supports the necessary PMD API */
+	if (rte_event_port_get_monitor_addr((uint8_t)qdata->portid, (uint8_t)qdata->qid,
+				&dummy) == -ENOTSUP) {
+		RTE_LOG(DEBUG, POWER, "event port does not support rte_event_get_monitor_addr\n");
+		return -ENOTSUP;
+	}
+
+	/* we're done */
+	return 0;
+}
+
+int
+rte_power_eventdev_pmgmt_port_enable(unsigned int lcore_id, uint8_t dev_id,
+		uint8_t port_id, enum rte_power_pmd_mgmt_type mode)
+{
+	const union queue qdata = {.portid = dev_id, .qid = port_id};
+	struct pmd_core_cfg *lcore_cfg;
+	struct queue_list_entry *queue_cfg;
+	struct rte_event_dev_info info;
+	rte_dequeue_callback_fn clb;
+	int ret;
+
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (rte_event_dev_info_get(dev_id, &info) < 0) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	/* check if queue id is valid */
+	if (port_id >= info.max_event_ports) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	lcore_cfg = &lcore_cfgs[lcore_id];
+
+	/* if callback was already enabled, check current callback type */
+	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
+		lcore_cfg->cb_mode != mode) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	/* we need this in various places */
+	rte_cpu_get_intrinsics_support(&global_data.intrinsics_support);
+
+	switch (mode) {
+	case RTE_POWER_MGMT_TYPE_MONITOR:
+		/* check if we can add a new port */
+		ret = check_evt_monitor(lcore_cfg, &qdata);
+		if (ret < 0)
+			goto end;
+
+		clb = evt_clb_umwait;
+		break;
+	case RTE_POWER_MGMT_TYPE_PAUSE:
+		/* figure out various time-to-tsc conversions */
+		if (global_data.tsc_per_us == 0)
+			calc_tsc();
+
+		clb = evt_clb_pause;
+		break;
+	default:
+		RTE_LOG(DEBUG, POWER, "Invalid power management type\n");
+		ret = -EINVAL;
+		goto end;
+	}
+	/* add this queue to the list */
+	ret = queue_list_add(lcore_cfg, &qdata);
+	if (ret < 0) {
+		RTE_LOG(DEBUG, POWER, "Failed to add queue to list: %s\n",
+				strerror(-ret));
+		goto end;
+	}
+	/* new queue is always added last */
+	queue_cfg = TAILQ_LAST(&lcore_cfg->head, queue_list_head);
+
+	/* when enabling first queue, ensure sleep target is not 0 */
+	if (lcore_cfg->n_queues == 1 && lcore_cfg->sleep_target == 0)
+		lcore_cfg->sleep_target = 1;
+
+	/* initialize data before enabling the callback */
+	if (lcore_cfg->n_queues == 1) {
+		lcore_cfg->cb_mode = mode;
+		lcore_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
+	}
+	queue_cfg->evt_cb = rte_event_add_dequeue_callback(dev_id, port_id,
+						clb, queue_cfg);
+
+	ret = 0;
+end:
+	return ret;
+}
+
+int
+rte_power_eventdev_pmgmt_port_disable(unsigned int lcore_id,
+		uint8_t dev_id, uint8_t port_id)
+{
+	const union queue qdata = {.portid = dev_id, .qid = port_id};
+	struct pmd_core_cfg *lcore_cfg;
+	struct queue_list_entry *queue_cfg;
+
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	/* no need to check queue id as wrong queue id would not be enabled */
+	lcore_cfg = &lcore_cfgs[lcore_id];
+
+	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED)
+		return -EINVAL;
+
+	/*
+	 * There is no good/easy way to do this without race conditions, so we
+	 * are just going to throw our hands in the air and hope that the user
+	 * has read the documentation and has ensured that ports are stopped at
+	 * the time we enter the API functions.
+	 */
+	queue_cfg = queue_list_take(lcore_cfg, &qdata);
+	if (queue_cfg == NULL)
+		return -ENOENT;
+
+	/* if we've removed all queues from the lists, set state to disabled */
+	if (lcore_cfg->n_queues == 0)
+		lcore_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED;
+
+	switch (lcore_cfg->cb_mode) {
+	case RTE_POWER_MGMT_TYPE_MONITOR: /* fall-through */
+	case RTE_POWER_MGMT_TYPE_SCALE:
+	case RTE_POWER_MGMT_TYPE_PAUSE:
+		rte_event_remove_dequeue_callback(dev_id, port_id,
+			queue_cfg->evt_cb);
+		break;
+	}
+	/*
+	 * the API doc mandates that the user stops all processing on affected
+	 * ports before calling any of these API's, so we can assume that the
+	 * callbacks can be freed. we're intentionally casting away const-ness.
+	 */
+	rte_free((void *)queue_cfg->evt_cb);
+	free(queue_cfg);
+
+	return 0;
+}
+
+
 int
 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
 		uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
diff --git a/lib/power/rte_power_pmd_mgmt.h b/lib/power/rte_power_pmd_mgmt.h
index 0f1a2eb22e..e1966b9777 100644
--- a/lib/power/rte_power_pmd_mgmt.h
+++ b/lib/power/rte_power_pmd_mgmt.h
@@ -87,6 +87,61 @@ int
 rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
 		uint16_t port_id, uint16_t queue_id);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice.
+ *
+ * Enable power management on a specified Event device port and lcore.
+ *
+ * @note This function is not thread-safe.
+ *
+ * @warning This function must be called when the event device stopped and
+ * no enqueue/dequeue is in progress!
+ *
+ * @param lcore_id
+ *   The lcore the event port will be polled from.
+ * @param dev_id
+ *   The identifier of the device.
+ * @param port_id
+ *   Event port identifier of the Event device.
+ * @param mode
+ *   The power management scheme to use for specified event port.
+ * @return
+ *   0 on success
+ *   <0 on error
+ */
+__rte_experimental
+int
+rte_power_eventdev_pmgmt_port_enable(unsigned int lcore_id,
+		uint8_t dev_id, uint8_t port_id,
+		enum rte_power_pmd_mgmt_type mode);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice.
+ *
+ * Disable power management on a specified Ethernet device Rx queue and lcore.
+ *
+ * @note This function is not thread-safe.
+ *
+ * @warning This function must be called when all affected Ethernet queues are
+ *   stopped and no Rx/Tx is in progress!
+ *
+ * @param lcore_id
+ *   The lcore the Rx queue is polled from.
+ * @param dev_id
+ *   The identifier of the device.
+ * @param port_id
+ *   Event port identifier of the Event device.
+ * @return
+ *   0 on success
+ *   <0 on error
+ */
+__rte_experimental
+int
+rte_power_eventdev_pmgmt_port_disable(unsigned int lcore_id,
+		uint8_t dev_id, uint8_t port_id);
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change, or be removed, without prior notice.
diff --git a/lib/power/version.map b/lib/power/version.map
index b8b54f768e..4ab762e072 100644
--- a/lib/power/version.map
+++ b/lib/power/version.map
@@ -52,4 +52,8 @@ EXPERIMENTAL {
 	rte_power_uncore_get_num_freqs;
 	rte_power_uncore_get_num_pkgs;
 	rte_power_uncore_init;
+
+	# added in 23.07
+	rte_power_eventdev_pmgmt_port_enable;
+	rte_power_eventdev_pmgmt_port_disable;
 };
-- 
2.34.1


  parent reply	other threads:[~2023-10-16 20:58 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-19  9:54 [RFC PATCH 1/5] eventdev: add power monitoring API on event port Sivaprasad Tummala
2023-04-19  9:54 ` [RFC PATCH 2/5] event/sw: support power monitor Sivaprasad Tummala
2023-04-19  9:54 ` [RFC PATCH 3/5] eventdev: support optional dequeue callbacks Sivaprasad Tummala
2023-04-24 16:06   ` Ferruh Yigit
2023-05-17 14:22   ` Burakov, Anatoly
2023-04-19  9:54 ` [RFC PATCH 4/5] power: add eventdev support for power management Sivaprasad Tummala
2023-05-17 14:43   ` Burakov, Anatoly
2023-05-24 12:34     ` Tummala, Sivaprasad
2023-04-19  9:54 ` [RFC PATCH 5/5] examples/eventdev_p: add eventdev " Sivaprasad Tummala
2023-04-19 10:15 ` [RFC PATCH 1/5] eventdev: add power monitoring API on event port Jerin Jacob
2023-04-24 16:06   ` Ferruh Yigit
2023-04-25  4:09     ` Jerin Jacob
2023-05-02 11:19       ` Ferruh Yigit
2023-05-03  7:58         ` Jerin Jacob
2023-05-03  8:13           ` Ferruh Yigit
2023-05-03  8:26             ` Jerin Jacob
2023-05-03 15:11               ` Tummala, Sivaprasad
2023-04-25  6:19     ` Mattias Rönnblom
2023-05-02 10:43       ` Ferruh Yigit
2023-05-17 14:48 ` Burakov, Anatoly
2023-10-16 20:57 ` [PATCH v1 1/6] " Sivaprasad Tummala
2023-10-16 20:57   ` [PATCH v1 2/6] event/sw: support power monitor Sivaprasad Tummala
2023-10-16 23:41     ` Tyler Retzlaff
2023-10-16 20:57   ` [PATCH v1 3/6] eventdev: support optional dequeue callbacks Sivaprasad Tummala
2023-10-16 23:47     ` Tyler Retzlaff
2023-10-16 20:57   ` [PATCH v1 4/6] event/sw: " Sivaprasad Tummala
2023-10-16 20:57   ` Sivaprasad Tummala [this message]
2023-10-16 23:51     ` [PATCH v1 5/6] power: add eventdev support for power management Tyler Retzlaff
2023-10-17  3:03       ` Tummala, Sivaprasad
2023-10-17  3:22     ` Jerin Jacob
2023-10-18  7:08       ` Tummala, Sivaprasad
2023-10-18  7:13         ` Jerin Jacob
2023-10-16 20:57   ` [PATCH v1 6/6] examples/eventdev_p: add eventdev " Sivaprasad Tummala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231016205715.970999-5-sivaprasad.tummala@amd.com \
    --to=sivaprasad.tummala@amd.com \
    --cc=anatoly.burakov@intel.com \
    --cc=david.hunt@intel.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@amd.com \
    --cc=harry.van.haaren@intel.com \
    --cc=jerinjacobk@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).