[RFC PATCH 0/4] lcore telemetry improvements

DPDK patches and discussions
 help / color / mirror / Atom feed

* [RFC PATCH 0/4] lcore telemetry improvements
@ 2022-11-23 10:19 Robin Jarry
  2022-11-23 10:19 ` [RFC PATCH 1/4] eal: add lcore info in telemetry Robin Jarry
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:19 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup, Robin Jarry

This is a follow up on previous work by Kevin Laatz:

http://patches.dpdk.org/project/dpdk/list/?series=24658&state=*

This is a much more basic and naive approach which leaves the busy
cycles percentage completely up to the application.

This series is aimed at allowing DPDK applications to expose their CPU
busy cycles ratio in the DPDK telemetry under /eal/lcore/info.

I have left it as RFC since calculating busy cycles can be
a controversial topic.

For reference, I have implemented a draft patch in OvS to use
rte_lcore_register_busy_percent_cb() and return the already available
busy cycles information:

https://github.com/rjarry/ovs/commit/4286c0e75583075a223a67eee746084a2f3b0547

Robin Jarry (4):
  eal: add lcore info in telemetry
  eal: allow applications to report their cpu utilization
  testpmd: add show lcores command
  testpmd: report lcore usage

 app/test-pmd/5tswap.c             |   5 +-
 app/test-pmd/cmdline.c            |  31 ++++++++
 app/test-pmd/csumonly.c           |   6 +-
 app/test-pmd/flowgen.c            |   2 +-
 app/test-pmd/icmpecho.c           |   6 +-
 app/test-pmd/iofwd.c              |   5 +-
 app/test-pmd/macfwd.c             |   5 +-
 app/test-pmd/macswap.c            |   5 +-
 app/test-pmd/noisy_vnf.c          |   4 +
 app/test-pmd/rxonly.c             |   5 +-
 app/test-pmd/shared_rxq_fwd.c     |   5 +-
 app/test-pmd/testpmd.c            |  69 +++++++++++++++-
 app/test-pmd/testpmd.h            |  25 +++++-
 app/test-pmd/txonly.c             |   7 +-
 lib/eal/common/eal_common_lcore.c | 127 +++++++++++++++++++++++++++++-
 lib/eal/include/rte_lcore.h       |  30 +++++++
 lib/eal/version.map               |   1 +
 17 files changed, 306 insertions(+), 32 deletions(-)

-- 
2.38.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 1/4] eal: add lcore info in telemetry
  2022-11-23 10:19 [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
@ 2022-11-23 10:19 ` Robin Jarry
  2022-11-23 10:19 ` [RFC PATCH 2/4] eal: allow applications to report their cpu utilization Robin Jarry
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:19 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup, Robin Jarry

Report the same information than rte_lcore_dump() in the telemetry
API into /eal/lcore/list and /eal/lcore/info,ID.

Example:

  --> /eal/lcore/info,3
  {
    "/eal/lcore/info": {
      "lcore_id": 3,
      "socket": 0,
      "role": "RTE",
      "cpuset": "3"
    }
  }

Signed-off-by: Robin Jarry <rjarry@redhat.com>
---
 lib/eal/common/eal_common_lcore.c | 90 +++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
index 06c594b0224f..31e3965dc5ad 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -10,6 +10,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
+#include <rte_telemetry.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -456,3 +457,92 @@ rte_lcore_dump(FILE *f)
 {
 	rte_lcore_iterate(lcore_dump_cb, f);
 }
+
+static int
+lcore_telemetry_id_cb(unsigned int lcore_id, void *arg)
+{
+	struct rte_tel_data *d = arg;
+	return rte_tel_data_add_array_int(d, lcore_id);
+}
+
+static int
+handle_lcore_list(const char *cmd __rte_unused,
+			const char *params __rte_unused,
+			struct rte_tel_data *d)
+{
+	int ret = rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
+	if (ret)
+		return ret;
+	return rte_lcore_iterate(lcore_telemetry_id_cb, d);
+}
+
+struct lcore_telemetry_info {
+	unsigned int lcore_id;
+	struct rte_tel_data *d;
+};
+
+static int
+lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
+{
+	struct lcore_telemetry_info *info = arg;
+	struct rte_config *cfg = rte_eal_get_configuration();
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN + 3];
+	const char *role;
+
+	if (info->lcore_id != lcore_id)
+		return 0;
+
+	switch (cfg->lcore_role[lcore_id]) {
+	case ROLE_RTE:
+		role = "RTE";
+		break;
+	case ROLE_SERVICE:
+		role = "SERVICE";
+		break;
+	case ROLE_NON_EAL:
+		role = "NON_EAL";
+		break;
+	default:
+		role = "UNKNOWN";
+		break;
+	}
+	if (eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset, sizeof(cpuset) - 3)) {
+		cpuset[sizeof(cpuset) - 4] = '.';
+		cpuset[sizeof(cpuset) - 3] = '.';
+		cpuset[sizeof(cpuset) - 2] = '.';
+		cpuset[sizeof(cpuset) - 1] = '\0';
+	}
+	rte_tel_data_start_dict(info->d);
+	rte_tel_data_add_dict_int(info->d, "lcore_id", lcore_id);
+	rte_tel_data_add_dict_int(info->d, "socket", rte_lcore_to_socket_id(lcore_id));
+	rte_tel_data_add_dict_string(info->d, "role", role);
+	rte_tel_data_add_dict_string(info->d, "cpuset", cpuset);
+
+	return 0;
+}
+
+static int
+handle_lcore_info(const char *cmd __rte_unused, const char *params, struct rte_tel_data *d)
+{
+	struct lcore_telemetry_info info = { .d = d };
+	char *endptr = NULL;
+	if (params == NULL || strlen(params) == 0)
+		return -EINVAL;
+	errno = 0;
+	info.lcore_id = strtoul(params, &endptr, 10);
+	if (errno)
+		return -errno;
+	if (endptr == params)
+		return -EINVAL;
+	return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
+}
+
+RTE_INIT(lcore_telemetry)
+{
+	rte_telemetry_register_cmd(
+			"/eal/lcore/list", handle_lcore_list,
+			"List of lcore ids. Takes no parameters");
+	rte_telemetry_register_cmd(
+			"/eal/lcore/info", handle_lcore_info,
+			"Returns lcore info. Parameters: int lcore_id");
+}
-- 
2.38.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 2/4] eal: allow applications to report their cpu utilization
  2022-11-23 10:19 [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
  2022-11-23 10:19 ` [RFC PATCH 1/4] eal: add lcore info in telemetry Robin Jarry
@ 2022-11-23 10:19 ` Robin Jarry
  2022-11-23 11:52   ` Morten Brørup
  2022-11-23 16:38   ` Stephen Hemminger
  2022-11-23 10:19 ` [RFC PATCH 3/4] testpmd: add show lcores command Robin Jarry
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:19 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup, Robin Jarry

Allow applications to register a callback that will be invoked in
rte_lcore_dump() and when requesting lcore info in the telemetry API.

The callback is expected to return a number between 0 and 100
representing the percentage of busy cycles spent over a fixed period of
time. The period of time is configured when registering the callback.

Cc: Bruce Richardson <bruce.richardson@intel.com>
Cc: Jerin Jacob <jerinj@marvell.com>
Cc: Kevin Laatz <kevin.laatz@intel.com>
Cc: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
Cc: Mattias Rönnblom <hofors@lysator.liu.se>
Cc: Morten Brørup <mb@smartsharesystems.com>
Signed-off-by: Robin Jarry <rjarry@redhat.com>
---
 lib/eal/common/eal_common_lcore.c | 37 ++++++++++++++++++++++++++++---
 lib/eal/include/rte_lcore.h       | 30 +++++++++++++++++++++++++
 lib/eal/version.map               |  1 +
 3 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
index 31e3965dc5ad..9a85fd8854df 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -420,14 +420,36 @@ rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
 	return ret;
 }
 
+static rte_lcore_busy_percent_cb lcore_busy_cb;
+static unsigned int lcore_busy_period;
+
+void
+rte_lcore_register_busy_percent_cb(rte_lcore_busy_percent_cb cb, unsigned int period)
+{
+	lcore_busy_cb = cb;
+	lcore_busy_period = period;
+}
+
+static int
+lcore_busy_percent(unsigned int lcore_id)
+{
+	int percent = -1;
+	if (lcore_busy_cb)
+		percent = lcore_busy_cb(lcore_id);
+	if (percent > 100)
+		percent = 100;
+	return percent;
+}
+
 static int
 lcore_dump_cb(unsigned int lcore_id, void *arg)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	char busy_str[16];
 	const char *role;
 	FILE *f = arg;
-	int ret;
+	int ret, busy;
 
 	switch (cfg->lcore_role[lcore_id]) {
 	case ROLE_RTE:
@@ -446,9 +468,16 @@ lcore_dump_cb(unsigned int lcore_id, void *arg)
 
 	ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
 		sizeof(cpuset));
-	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
+	busy = lcore_busy_percent(lcore_id);
+	if (busy < 0) {
+		snprintf(busy_str, sizeof(busy_str), "%s", "N/A");
+	} else {
+		snprintf(busy_str, sizeof(busy_str), "%d%% last %d sec",
+			busy, lcore_busy_period);
+	}
+	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s, busy %s\n", lcore_id,
 		rte_lcore_to_socket_id(lcore_id), role, cpuset,
-		ret == 0 ? "" : "...");
+		ret == 0 ? "" : "...", busy_str);
 	return 0;
 }
 
@@ -517,6 +546,8 @@ lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
 	rte_tel_data_add_dict_int(info->d, "socket", rte_lcore_to_socket_id(lcore_id));
 	rte_tel_data_add_dict_string(info->d, "role", role);
 	rte_tel_data_add_dict_string(info->d, "cpuset", cpuset);
+	rte_tel_data_add_dict_int(info->d, "busy_percent", lcore_busy_percent(lcore_id));
+	rte_tel_data_add_dict_int(info->d, "busy_period", lcore_busy_period);
 
 	return 0;
 }
diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
index 6938c3fd7b81..b1223eaa12bf 100644
--- a/lib/eal/include/rte_lcore.h
+++ b/lib/eal/include/rte_lcore.h
@@ -328,6 +328,36 @@ typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
 int
 rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
 
+/**
+ * Callback to allow applications to report CPU utilization.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @return
+ *   - A number between 0 and 100 representing the percentage of busy cycles
+ *     over the last period for the given lcore_id.
+ *   - -1 if the information is not available or if any error occurred.
+ */
+typedef int (*rte_lcore_busy_percent_cb)(unsigned int lcore_id);
+
+/**
+ * Register a callback from an application to be called in rte_lcore_dump()
+ * and the /eal/lcore/info telemetry endpoint handler.
+ *
+ * Applications are expected to return a number between 0 and 100 representing
+ * the percentage of busy cycles over the last period for the provided lcore_id.
+ * The implementation details for computing such a ratio is specific to each
+ * application.
+ *
+ * @param cb
+ *   The callback function.
+ * @param period
+ *   The period in seconds over which the percentage of busy cycles will be
+ *   reported by the application.
+ */
+__rte_experimental
+void rte_lcore_register_busy_percent_cb(rte_lcore_busy_percent_cb cb, unsigned int period);
+
 /**
  * List all lcores.
  *
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 7ad12a7dc985..138537ee5835 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -440,6 +440,7 @@ EXPERIMENTAL {
 	rte_thread_detach;
 	rte_thread_equal;
 	rte_thread_join;
+	rte_lcore_register_busy_percent_cb;
 };
 
 INTERNAL {
-- 
2.38.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [RFC PATCH 2/4] eal: allow applications to report their cpu utilization
  2022-11-23 10:19 ` [RFC PATCH 2/4] eal: allow applications to report their cpu utilization Robin Jarry
@ 2022-11-23 11:52   ` Morten Brørup
  2022-11-23 13:29     ` Robin Jarry
  2022-11-23 16:38   ` Stephen Hemminger
  1 sibling, 1 reply; 11+ messages in thread
From: Morten Brørup @ 2022-11-23 11:52 UTC (permalink / raw)
  To: Robin Jarry, dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom

> From: Robin Jarry [mailto:rjarry@redhat.com]
> Sent: Wednesday, 23 November 2022 11.19
> To: dev@dpdk.org
> Cc: Bruce Richardson; Jerin Jacob; Kevin Laatz; Konstantin Ananyev;
> Mattias Rönnblom; Morten Brørup; Robin Jarry
> Subject: [RFC PATCH 2/4] eal: allow applications to report their cpu
> utilization
> 
> Allow applications to register a callback that will be invoked in
> rte_lcore_dump() and when requesting lcore info in the telemetry API.
> 
> The callback is expected to return a number between 0 and 100
> representing the percentage of busy cycles spent over a fixed period of
> time. The period of time is configured when registering the callback.
> 
> Cc: Bruce Richardson <bruce.richardson@intel.com>
> Cc: Jerin Jacob <jerinj@marvell.com>
> Cc: Kevin Laatz <kevin.laatz@intel.com>
> Cc: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> Cc: Mattias Rönnblom <hofors@lysator.liu.se>
> Cc: Morten Brørup <mb@smartsharesystems.com>
> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> ---

This patch simply provides a function for the application to register a constant X and a callback, which returns Y.

X happens to be a duration in seconds.
Y can be a number between 0 and 100, and happens to be the lcore business (to be calculated by the application).

So I agree that it contains no controversial calculations. :-)

However, if the lcore business is supposed to be used for power management or similar, it must have much higher resolution than one second.

Also, CPU Usage is often reported in multiple time intervals, e.g. /proc/loadavg provides 1, 5 and 10 minute load averages.

Perhaps a deeper issue is that the output could also be considered statistics, which is handled differently in different applications. E.g. the statistics module in the SmartShare StraightShaper application includes histories in multiple time resolutions, e.g. 5 minutes in 1-second intervals, up to 1 year in 1 day intervals.

On the other hand, if the application must expose 1/5/10 minute statistics, it could register a callback with a 1 minute interval, and aggregate the numbers it its own statistics module.

Here's completely different angle, considering how statistics is often collected and processed by SNMP based tools:

This patch is based on a "gauge" (i.e. the busyness percentage) and an "interval" (i.e. the duration the gauge covers). I have to sample this gauge exactly every interval to collect data for a busyness chart. If the application's reporting interval is 1 second, I must sample the gauge every second, or statistical information will be lost.

Instead, I would prefer the callback to return two counters: units_passed (e.g. number of cycles since application start) and units_busy (e.g. number of busy cycles since application start).

I can sample these at any interval, and calculate the busyness of that interval as the difference: (units_busy - units_busy_before) / (units_passed - units_passed_before).

If needed, I can also sample them at multiple intervals, e.g. every 1, 5 and 10 minutes, and expose in the "loadavg".

I can also sample them every millisecond if I need to react quickly to a sudden increase/drop in busyness.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 2/4] eal: allow applications to report their cpu utilization
  2022-11-23 11:52   ` Morten Brørup
@ 2022-11-23 13:29     ` Robin Jarry
  0 siblings, 0 replies; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 13:29 UTC (permalink / raw)
  To: Morten Brørup, dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom

Hi Morten,

Morten Brørup, Nov 23, 2022 at 12:52:
> This patch is based on a "gauge" (i.e. the busyness percentage) and an
> "interval" (i.e. the duration the gauge covers). I have to sample this
> gauge exactly every interval to collect data for a busyness chart. If
> the application's reporting interval is 1 second, I must sample the
> gauge every second, or statistical information will be lost.
>
> Instead, I would prefer the callback to return two counters:
> units_passed (e.g. number of cycles since application start) and
> units_busy (e.g. number of busy cycles since application start).
>
> I can sample these at any interval, and calculate the busyness of that
> interval as the difference: (units_busy - units_busy_before)
> / (units_passed - units_passed_before).
>
> If needed, I can also sample them at multiple intervals, e.g. every 1,
> 5 and 10 minutes, and expose in the "loadavg".
>
> I can also sample them every millisecond if I need to react quickly to
> a sudden increase/drop in busyness.

Your proposal makes a lot of sense and it will even be easier to
implement for applications. I'll do that for v2.

Thanks for the feedback.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 2/4] eal: allow applications to report their cpu utilization
  2022-11-23 10:19 ` [RFC PATCH 2/4] eal: allow applications to report their cpu utilization Robin Jarry
  2022-11-23 11:52   ` Morten Brørup
@ 2022-11-23 16:38   ` Stephen Hemminger
  1 sibling, 0 replies; 11+ messages in thread
From: Stephen Hemminger @ 2022-11-23 16:38 UTC (permalink / raw)
  To: Robin Jarry
  Cc: dev, Bruce Richardson, Jerin Jacob, Kevin Laatz,
	Konstantin Ananyev, Mattias Rönnblom, Morten Brørup

On Wed, 23 Nov 2022 11:19:29 +0100
Robin Jarry <rjarry@redhat.com> wrote:

> +static rte_lcore_busy_percent_cb lcore_busy_cb;
> +static unsigned int lcore_busy_period;
> +
> +void
> +rte_lcore_register_busy_percent_cb(rte_lcore_busy_percent_cb cb, unsigned int period)
> +{
> +	lcore_busy_cb = cb;
> +	lcore_busy_period = period;
> +}
> +
> +static int
> +lcore_busy_percent(unsigned int lcore_id)
> +{
> +	int percent = -1;
> +	if (lcore_busy_cb)
> +		percent = lcore_busy_cb(lcore_id);
> +	if (percent > 100)
> +		percent = 100;
> +	return percent;
> +}

This is a case where floating point double precision might be
a better API.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 3/4] testpmd: add show lcores command
  2022-11-23 10:19 [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
  2022-11-23 10:19 ` [RFC PATCH 1/4] eal: add lcore info in telemetry Robin Jarry
  2022-11-23 10:19 ` [RFC PATCH 2/4] eal: allow applications to report their cpu utilization Robin Jarry
@ 2022-11-23 10:19 ` Robin Jarry
  2022-11-24  0:54   ` fengchengwen
  2022-11-23 10:19 ` [RFC PATCH 4/4] testpmd: report lcore usage Robin Jarry
  2022-11-23 10:44 ` [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
  4 siblings, 1 reply; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:19 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup, Robin Jarry

Add a simple command that calls rte_lcore_dump().

Signed-off-by: Robin Jarry <rjarry@redhat.com>
---
 app/test-pmd/cmdline.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index b32dc8bfd445..d290938ffb4e 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -7151,6 +7151,36 @@ static cmdline_parse_inst_t cmd_showfwdall = {
 	},
 };
 
+/* show lcores */
+struct lcores_result {
+	cmdline_fixed_string_t show;
+	cmdline_fixed_string_t lcores;
+};
+
+static cmdline_parse_token_string_t cmd_lcores_action =
+	TOKEN_STRING_INITIALIZER(struct lcores_result, show, "show");
+static cmdline_parse_token_string_t cmd_lcores_lcores =
+	TOKEN_STRING_INITIALIZER(struct lcores_result, lcores, "lcores");
+
+static void
+cmd_showlcores_parsed(__rte_unused void *parsed_result,
+		      __rte_unused struct cmdline *cl,
+		      __rte_unused void *data)
+{
+	rte_lcore_dump(stdout);
+}
+
+static cmdline_parse_inst_t cmd_showlcores = {
+	.f = cmd_showlcores_parsed,
+	.data = NULL,
+	.help_str = "show lcores",
+	.tokens = {
+		(void *)&cmd_lcores_action,
+		(void *)&cmd_lcores_lcores,
+		NULL,
+	},
+};
+
 /* *** READ A RING DESCRIPTOR OF A PORT RX/TX QUEUE *** */
 struct cmd_read_rxd_txd_result {
 	cmdline_fixed_string_t read;
@@ -12637,6 +12667,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
 	(cmdline_parse_inst_t *)&cmd_showdevice,
 	(cmdline_parse_inst_t *)&cmd_showcfg,
 	(cmdline_parse_inst_t *)&cmd_showfwdall,
+	(cmdline_parse_inst_t *)&cmd_showlcores,
 	(cmdline_parse_inst_t *)&cmd_start,
 	(cmdline_parse_inst_t *)&cmd_start_tx_first,
 	(cmdline_parse_inst_t *)&cmd_start_tx_first_n,
-- 
2.38.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 3/4] testpmd: add show lcores command
  2022-11-23 10:19 ` [RFC PATCH 3/4] testpmd: add show lcores command Robin Jarry
@ 2022-11-24  0:54   ` fengchengwen
  0 siblings, 0 replies; 11+ messages in thread
From: fengchengwen @ 2022-11-24  0:54 UTC (permalink / raw)
  To: Robin Jarry, dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup

Hi Robin,

On 2022/11/23 18:19, Robin Jarry wrote:
> Add a simple command that calls rte_lcore_dump().
> 
> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> ---
>  app/test-pmd/cmdline.c | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index b32dc8bfd445..d290938ffb4e 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -7151,6 +7151,36 @@ static cmdline_parse_inst_t cmd_showfwdall = {
>  	},
>  };
>  
> +/* show lcores */
> +struct lcores_result {
> +	cmdline_fixed_string_t show;
> +	cmdline_fixed_string_t lcores;
> +};
> +
> +static cmdline_parse_token_string_t cmd_lcores_action =
> +	TOKEN_STRING_INITIALIZER(struct lcores_result, show, "show");
> +static cmdline_parse_token_string_t cmd_lcores_lcores =
> +	TOKEN_STRING_INITIALIZER(struct lcores_result, lcores, "lcores");
> +
> +static void
> +cmd_showlcores_parsed(__rte_unused void *parsed_result,
> +		      __rte_unused struct cmdline *cl,
> +		      __rte_unused void *data)
> +{
> +	rte_lcore_dump(stdout);
> +}
> +
> +static cmdline_parse_inst_t cmd_showlcores = {
> +	.f = cmd_showlcores_parsed,
> +	.data = NULL,
> +	.help_str = "show lcores",
> +	.tokens = {
> +		(void *)&cmd_lcores_action,
> +		(void *)&cmd_lcores_lcores,
> +		NULL,
> +	},
> +};

There are many dump_xxx cmd, suggest the commands as dump_lcores:
testpmd> dump_
 dump_physmem [Mul-choice STRING]: Dump status
 dump_memzone [Mul-choice STRING]: Dump status
 dump_socket_mem [Mul-choice STRING]: Dump status
 dump_struct_sizes [Mul-choice STRING]: Dump status
 dump_ring [Mul-choice STRING]: Dump status
 dump_mempool [Mul-choice STRING]: Dump status
 dump_devargs [Mul-choice STRING]: Dump status
 dump_log_types [Mul-choice STRING]: Dump status
 dump_ring [Mul-choice STRING]: dump_ring|dump_mempool <name>: Dump one ring/mempool
 dump_mempool [Mul-choice STRING]: dump_ring|dump_mempool <name>: Dump one ring/mempool

> +
>  /* *** READ A RING DESCRIPTOR OF A PORT RX/TX QUEUE *** */
>  struct cmd_read_rxd_txd_result {
>  	cmdline_fixed_string_t read;
> @@ -12637,6 +12667,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
>  	(cmdline_parse_inst_t *)&cmd_showdevice,
>  	(cmdline_parse_inst_t *)&cmd_showcfg,
>  	(cmdline_parse_inst_t *)&cmd_showfwdall,
> +	(cmdline_parse_inst_t *)&cmd_showlcores,
>  	(cmdline_parse_inst_t *)&cmd_start,
>  	(cmdline_parse_inst_t *)&cmd_start_tx_first,
>  	(cmdline_parse_inst_t *)&cmd_start_tx_first_n,
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 4/4] testpmd: report lcore usage
  2022-11-23 10:19 [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
                   ` (2 preceding siblings ...)
  2022-11-23 10:19 ` [RFC PATCH 3/4] testpmd: add show lcores command Robin Jarry
@ 2022-11-23 10:19 ` Robin Jarry
  2022-11-23 10:44 ` [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
  4 siblings, 0 replies; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:19 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup, Robin Jarry

Reuse the --record-core-cycles option to account for busy cycles. One
turn of packet_fwd_t is considered "busy" if there was at least one
received or transmitted packet.

Add two new interval_cycles and busy_cycles fields to the fwd_stream
structure. The core_cycles field cannot be reset to zero without
breaking the current behaviour.

Update get_end_cycles() to accept an additional argument for the number
of processed packets.

Every 10s, copy the number of cycles (busy and total) into a moving
average buffer. The buffer holds 6 samples of 10s and is rotated when
full.

When --record-core-cycles is specified, register a callback with
rte_lcore_register_busy_percent_cb(). In the callback, access the
average buffer to compute the percentage of busy cycles.

Example:

  testpmd> show lcores
  lcore 3, socket 0, role RTE, cpuset 3, busy N/A
  lcore 4, socket 0, role RTE, cpuset 4, busy 39% last 60 sec

Signed-off-by: Robin Jarry <rjarry@redhat.com>
---
 app/test-pmd/5tswap.c         |  5 ++-
 app/test-pmd/csumonly.c       |  6 +--
 app/test-pmd/flowgen.c        |  2 +-
 app/test-pmd/icmpecho.c       |  6 +--
 app/test-pmd/iofwd.c          |  5 ++-
 app/test-pmd/macfwd.c         |  5 ++-
 app/test-pmd/macswap.c        |  5 ++-
 app/test-pmd/noisy_vnf.c      |  4 ++
 app/test-pmd/rxonly.c         |  5 ++-
 app/test-pmd/shared_rxq_fwd.c |  5 ++-
 app/test-pmd/testpmd.c        | 69 +++++++++++++++++++++++++++++++++--
 app/test-pmd/testpmd.h        | 25 +++++++++++--
 app/test-pmd/txonly.c         |  7 ++--
 13 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/app/test-pmd/5tswap.c b/app/test-pmd/5tswap.c
index f041a5e1d530..03225075716c 100644
--- a/app/test-pmd/5tswap.c
+++ b/app/test-pmd/5tswap.c
@@ -116,7 +116,7 @@ pkt_burst_5tuple_swap(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	txp = &ports[fs->tx_port];
@@ -182,7 +182,8 @@ pkt_burst_5tuple_swap(struct fwd_stream *fs)
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_rx);
 	}
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 1c2459851522..03e141221a56 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -868,7 +868,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	rx_bad_ip_csum = 0;
@@ -1200,8 +1200,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 			rte_pktmbuf_free(tx_pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_rx);
 	}
-
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index fd6abc0f4124..7b2f0ffdf0f5 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -196,7 +196,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 
 	RTE_PER_LCORE(_next_flow) = next_flow;
 
-	get_end_cycles(fs, start_tsc);
+	get_end_cycles(fs, start_tsc, nb_tx);
 }
 
 static int
diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c
index 066f2a3ab79b..2fc9f96dc95f 100644
--- a/app/test-pmd/icmpecho.c
+++ b/app/test-pmd/icmpecho.c
@@ -303,7 +303,7 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	nb_replies = 0;
@@ -508,8 +508,8 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
 			} while (++nb_tx < nb_replies);
 		}
 	}
-
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c
index 8fafdec548ad..e5a2dbe20c69 100644
--- a/app/test-pmd/iofwd.c
+++ b/app/test-pmd/iofwd.c
@@ -59,7 +59,7 @@ pkt_burst_io_forward(struct fwd_stream *fs)
 			pkts_burst, nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 	fs->rx_packets += nb_rx;
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
@@ -84,7 +84,8 @@ pkt_burst_io_forward(struct fwd_stream *fs)
 		} while (++nb_tx < nb_rx);
 	}
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c
index beb220fbb462..9db623999970 100644
--- a/app/test-pmd/macfwd.c
+++ b/app/test-pmd/macfwd.c
@@ -65,7 +65,7 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	txp = &ports[fs->tx_port];
@@ -115,7 +115,8 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
 		} while (++nb_tx < nb_rx);
 	}
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c
index 4f8deb338296..4db134ac1d91 100644
--- a/app/test-pmd/macswap.c
+++ b/app/test-pmd/macswap.c
@@ -66,7 +66,7 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	txp = &ports[fs->tx_port];
@@ -93,7 +93,8 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_rx);
 	}
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
index c65ec6f06a5c..290bdcda45f0 100644
--- a/app/test-pmd/noisy_vnf.c
+++ b/app/test-pmd/noisy_vnf.c
@@ -152,6 +152,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
 	uint64_t delta_ms;
 	bool needs_flush = false;
 	uint64_t now;
+	uint64_t start_tsc = 0;
+
+	get_start_cycles(&start_tsc);
 
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
 			pkts_burst, nb_pkt_per_burst);
@@ -219,6 +222,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
 		fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent);
 		ncf->prev_time = rte_get_timer_cycles();
 	}
+	get_end_cycles(fs, start_tsc, nb_rx + nb_tx);
 }
 
 #define NOISY_STRSIZE 256
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index d528d4f34e60..519202339e16 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -58,13 +58,14 @@ pkt_burst_receive(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	for (i = 0; i < nb_rx; i++)
 		rte_pktmbuf_free(pkts_burst[i]);
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/shared_rxq_fwd.c b/app/test-pmd/shared_rxq_fwd.c
index 2e9047804b5b..395b73bfe52e 100644
--- a/app/test-pmd/shared_rxq_fwd.c
+++ b/app/test-pmd/shared_rxq_fwd.c
@@ -102,9 +102,10 @@ shared_rxq_fwd(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 	forward_shared_rxq(fs, nb_rx, pkts_burst);
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 134d79a55547..450bc281fd69 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2248,20 +2248,26 @@ static void
 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
 {
 	struct fwd_stream **fsm;
+	uint64_t tics_per_1sec;
+	uint64_t tics_current;
+	uint64_t tics;
 	streamid_t nb_fs;
 	streamid_t sm_id;
+	int interval, loop;
 #ifdef RTE_LIB_BITRATESTATS
-	uint64_t tics_per_1sec;
 	uint64_t tics_datum;
-	uint64_t tics_current;
 	uint16_t i, cnt_ports;
 
 	cnt_ports = nb_ports;
 	tics_datum = rte_rdtsc();
-	tics_per_1sec = rte_get_timer_hz();
 #endif
+	tics_per_1sec = rte_get_timer_hz();
+	tics = rte_rdtsc();
 	fsm = &fwd_streams[fc->stream_idx];
 	nb_fs = fc->stream_nb;
+	fc->lcore_id = rte_lcore_id();
+	interval = 0;
+	loop = 0;
 	do {
 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
 			if (!fsm[sm_id]->disabled)
@@ -2284,8 +2290,58 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
 				latencystats_lcore_id == rte_lcore_id())
 			rte_latencystats_update();
 #endif
-
+		if (record_core_cycles && ++loop == 1024) {
+			loop = 0;
+			tics_current = rte_rdtsc();
+			if (tics_current - tics >= tics_per_1sec * LCORE_CYCLES_INTERVAL_LEN) {
+				for (sm_id = 0; sm_id < nb_fs; sm_id++) {
+					struct fwd_stream *fs = fsm[sm_id];
+					if (fs->disabled)
+						continue;
+					fc->cycles[interval].busy += fs->interval_cycles.busy;
+					fc->cycles[interval].total += fs->interval_cycles.total;
+					fs->interval_cycles.busy = 0;
+					fs->interval_cycles.total = 0;
+				}
+				interval += 1;
+				if (interval == LCORE_CYCLES_INTERVAL_COUNT) {
+					memmove(&fc->cycles[0], &fc->cycles[1],
+						(LCORE_CYCLES_INTERVAL_COUNT - 1)
+						* sizeof(fc->cycles[0]));
+					interval = 0;
+				}
+				fc->cycles[interval].busy = 0;
+				fc->cycles[interval].total = 0;
+				tics = tics_current;
+			}
+		}
 	} while (! fc->stopped);
+	memset(&fc->cycles, 0, sizeof(fc->cycles));
+}
+
+static int
+lcore_busy_percent_callback(unsigned int lcore_id)
+{
+	uint64_t busy_cycles, total_cycles;
+	struct fwd_lcore *fc;
+	int i, c;
+
+	for (c = 0; c < nb_lcores; c++) {
+		fc = fwd_lcores[c];
+		if (fc->lcore_id != lcore_id)
+			continue;
+		busy_cycles = total_cycles = 0;
+
+		for (i = 0; i < LCORE_CYCLES_INTERVAL_COUNT; i++) {
+			busy_cycles += fc->cycles[i].busy;
+			total_cycles += fc->cycles[i].total;
+		}
+		if (total_cycles == 0)
+			return -1;
+		return 100 * busy_cycles / total_cycles;
+	}
+
+	return -1;
 }
 
 static int
@@ -4522,6 +4578,11 @@ main(int argc, char** argv)
 		rte_stats_bitrate_reg(bitrate_data);
 	}
 #endif
+
+	if (record_core_cycles)
+		rte_lcore_register_busy_percent_cb(lcore_busy_percent_callback,
+			LCORE_CYCLES_INTERVAL_LEN * LCORE_CYCLES_INTERVAL_COUNT);
+
 #ifdef RTE_LIB_CMDLINE
 	if (init_cmdline() != 0)
 		rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 7d24d25970d2..684a06919986 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -145,6 +145,14 @@ extern const struct rss_type_info rss_type_table[];
  */
 extern char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
 
+/**
+ * Used with --record-core-cycles.
+ */
+struct lcore_cycles {
+	uint64_t busy;
+	uint64_t total;
+};
+
 /**
  * The data structure associated with a forwarding stream between a receive
  * port/queue and a transmit port/queue.
@@ -175,6 +183,7 @@ struct fwd_stream {
 	unsigned int gro_times;	/**< GRO operation times */
 #endif
 	uint64_t     core_cycles; /**< used for RX and TX processing */
+	struct lcore_cycles interval_cycles;
 	struct pkt_burst_stats rx_burst_stats;
 	struct pkt_burst_stats tx_burst_stats;
 	struct fwd_lcore *lcore; /**< Lcore being scheduled. */
@@ -341,6 +350,9 @@ struct rte_port {
 	struct xstat_display_info xstats_info;
 };
 
+#define LCORE_CYCLES_INTERVAL_COUNT 6
+#define LCORE_CYCLES_INTERVAL_LEN 10
+
 /**
  * The data structure associated with each forwarding logical core.
  * The logical cores are internally numbered by a core index from 0 to
@@ -360,6 +372,8 @@ struct fwd_lcore {
 	streamid_t stream_nb;    /**< number of streams in "fwd_streams" */
 	lcoreid_t  cpuid_idx;    /**< index of logical core in CPU id table */
 	volatile char stopped;   /**< stop forwarding when set */
+	unsigned int lcore_id;   /**< return value of rte_lcore_id() */
+	struct lcore_cycles cycles[LCORE_CYCLES_INTERVAL_COUNT];  /**< busy percent stats */
 };
 
 /*
@@ -836,10 +850,15 @@ get_start_cycles(uint64_t *start_tsc)
 }
 
 static inline void
-get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc)
+get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc, uint64_t nb_packets)
 {
-	if (record_core_cycles)
-		fs->core_cycles += rte_rdtsc() - start_tsc;
+	if (record_core_cycles) {
+		uint64_t cycles = rte_rdtsc() - start_tsc;
+		fs->core_cycles += cycles;
+		fs->interval_cycles.total += cycles;
+		if (nb_packets > 0)
+			fs->interval_cycles.busy += cycles;
+	}
 }
 
 static inline void
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 021624952daa..ad37626ff63c 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -331,7 +331,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	struct rte_mbuf *pkt;
 	struct rte_mempool *mbp;
 	struct rte_ether_hdr eth_hdr;
-	uint16_t nb_tx;
+	uint16_t nb_tx = 0;
 	uint16_t nb_pkt;
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
@@ -392,7 +392,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	}
 
 	if (nb_pkt == 0)
-		return;
+		goto end;
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 
@@ -426,7 +426,8 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		} while (++nb_tx < nb_pkt);
 	}
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_tx);
 }
 
 static int
-- 
2.38.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 0/4] lcore telemetry improvements
  2022-11-23 10:19 [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
                   ` (3 preceding siblings ...)
  2022-11-23 10:19 ` [RFC PATCH 4/4] testpmd: report lcore usage Robin Jarry
@ 2022-11-23 10:44 ` Robin Jarry
  4 siblings, 0 replies; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:44 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup

There was a hiccup with my smtp config. Sorry about the double send.

Please ignore this series and reply on the other one.

I have updated patchwork accordingly.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 0/4] lcore telemetry improvements
@ 2022-11-23 10:26 Robin Jarry
  2022-11-23 10:26 ` [RFC PATCH 4/4] testpmd: report lcore usage Robin Jarry
  0 siblings, 1 reply; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:26 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup, Robin Jarry

This is a follow up on previous work by Kevin Laatz:

http://patches.dpdk.org/project/dpdk/list/?series=24658&state=*

This is a much more basic and naive approach which leaves the busy
cycles percentage completely up to the application.

This series is aimed at allowing DPDK applications to expose their CPU
busy cycles ratio in the DPDK telemetry under /eal/lcore/info.

I have left it as RFC since calculating busy cycles can be
a controversial topic.

For reference, I have implemented a draft patch in OvS to use
rte_lcore_register_busy_percent_cb() and report the already available
busy cycles information.

https://github.com/rjarry/ovs/commit/4286c0e75583075a223a67eee746084a2f3b0547

Robin Jarry (4):
  eal: add lcore info in telemetry
  eal: allow applications to report their cpu utilization
  testpmd: add show lcores command
  testpmd: report lcore usage

 app/test-pmd/5tswap.c             |   5 +-
 app/test-pmd/cmdline.c            |  31 ++++++++
 app/test-pmd/csumonly.c           |   6 +-
 app/test-pmd/flowgen.c            |   2 +-
 app/test-pmd/icmpecho.c           |   6 +-
 app/test-pmd/iofwd.c              |   5 +-
 app/test-pmd/macfwd.c             |   5 +-
 app/test-pmd/macswap.c            |   5 +-
 app/test-pmd/noisy_vnf.c          |   4 +
 app/test-pmd/rxonly.c             |   5 +-
 app/test-pmd/shared_rxq_fwd.c     |   5 +-
 app/test-pmd/testpmd.c            |  69 +++++++++++++++-
 app/test-pmd/testpmd.h            |  25 +++++-
 app/test-pmd/txonly.c             |   7 +-
 lib/eal/common/eal_common_lcore.c | 127 +++++++++++++++++++++++++++++-
 lib/eal/include/rte_lcore.h       |  30 +++++++
 lib/eal/version.map               |   1 +
 17 files changed, 306 insertions(+), 32 deletions(-)

-- 
2.38.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 4/4] testpmd: report lcore usage
  2022-11-23 10:26 Robin Jarry
@ 2022-11-23 10:26 ` Robin Jarry
  0 siblings, 0 replies; 11+ messages in thread
From: Robin Jarry @ 2022-11-23 10:26 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Jerin Jacob, Kevin Laatz, Konstantin Ananyev,
	Mattias Rönnblom, Morten Brørup, Robin Jarry

Reuse the --record-core-cycles option to account for busy cycles. One
turn of packet_fwd_t is considered "busy" if there was at least one
received or transmitted packet.

Add two new interval_cycles and busy_cycles fields to the fwd_stream
structure. The core_cycles field cannot be reset to zero without
breaking the current behaviour.

Update get_end_cycles() to accept an additional argument for the number
of processed packets.

Every 10s, copy the number of cycles (busy and total) into a moving
average buffer. The buffer holds 6 samples of 10s and is rotated when
full.

When --record-core-cycles is specified, register a callback with
rte_lcore_register_busy_percent_cb(). In the callback, access the
average buffer to compute the percentage of busy cycles.

Example:

  testpmd> show lcores
  lcore 3, socket 0, role RTE, cpuset 3, busy N/A
  lcore 4, socket 0, role RTE, cpuset 4, busy 39% last 60 sec

Signed-off-by: Robin Jarry <rjarry@redhat.com>
---
 app/test-pmd/5tswap.c         |  5 ++-
 app/test-pmd/csumonly.c       |  6 +--
 app/test-pmd/flowgen.c        |  2 +-
 app/test-pmd/icmpecho.c       |  6 +--
 app/test-pmd/iofwd.c          |  5 ++-
 app/test-pmd/macfwd.c         |  5 ++-
 app/test-pmd/macswap.c        |  5 ++-
 app/test-pmd/noisy_vnf.c      |  4 ++
 app/test-pmd/rxonly.c         |  5 ++-
 app/test-pmd/shared_rxq_fwd.c |  5 ++-
 app/test-pmd/testpmd.c        | 69 +++++++++++++++++++++++++++++++++--
 app/test-pmd/testpmd.h        | 25 +++++++++++--
 app/test-pmd/txonly.c         |  7 ++--
 13 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/app/test-pmd/5tswap.c b/app/test-pmd/5tswap.c
index f041a5e1d530..03225075716c 100644
--- a/app/test-pmd/5tswap.c
+++ b/app/test-pmd/5tswap.c
@@ -116,7 +116,7 @@ pkt_burst_5tuple_swap(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	txp = &ports[fs->tx_port];
@@ -182,7 +182,8 @@ pkt_burst_5tuple_swap(struct fwd_stream *fs)
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_rx);
 	}
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 1c2459851522..03e141221a56 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -868,7 +868,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	rx_bad_ip_csum = 0;
@@ -1200,8 +1200,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
 			rte_pktmbuf_free(tx_pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_rx);
 	}
-
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index fd6abc0f4124..7b2f0ffdf0f5 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -196,7 +196,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 
 	RTE_PER_LCORE(_next_flow) = next_flow;
 
-	get_end_cycles(fs, start_tsc);
+	get_end_cycles(fs, start_tsc, nb_tx);
 }
 
 static int
diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c
index 066f2a3ab79b..2fc9f96dc95f 100644
--- a/app/test-pmd/icmpecho.c
+++ b/app/test-pmd/icmpecho.c
@@ -303,7 +303,7 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	nb_replies = 0;
@@ -508,8 +508,8 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
 			} while (++nb_tx < nb_replies);
 		}
 	}
-
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c
index 8fafdec548ad..e5a2dbe20c69 100644
--- a/app/test-pmd/iofwd.c
+++ b/app/test-pmd/iofwd.c
@@ -59,7 +59,7 @@ pkt_burst_io_forward(struct fwd_stream *fs)
 			pkts_burst, nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 	fs->rx_packets += nb_rx;
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
@@ -84,7 +84,8 @@ pkt_burst_io_forward(struct fwd_stream *fs)
 		} while (++nb_tx < nb_rx);
 	}
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c
index beb220fbb462..9db623999970 100644
--- a/app/test-pmd/macfwd.c
+++ b/app/test-pmd/macfwd.c
@@ -65,7 +65,7 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	txp = &ports[fs->tx_port];
@@ -115,7 +115,8 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
 		} while (++nb_tx < nb_rx);
 	}
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c
index 4f8deb338296..4db134ac1d91 100644
--- a/app/test-pmd/macswap.c
+++ b/app/test-pmd/macswap.c
@@ -66,7 +66,7 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	txp = &ports[fs->tx_port];
@@ -93,7 +93,8 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_rx);
 	}
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
index c65ec6f06a5c..290bdcda45f0 100644
--- a/app/test-pmd/noisy_vnf.c
+++ b/app/test-pmd/noisy_vnf.c
@@ -152,6 +152,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
 	uint64_t delta_ms;
 	bool needs_flush = false;
 	uint64_t now;
+	uint64_t start_tsc = 0;
+
+	get_start_cycles(&start_tsc);
 
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
 			pkts_burst, nb_pkt_per_burst);
@@ -219,6 +222,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
 		fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent);
 		ncf->prev_time = rte_get_timer_cycles();
 	}
+	get_end_cycles(fs, start_tsc, nb_rx + nb_tx);
 }
 
 #define NOISY_STRSIZE 256
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index d528d4f34e60..519202339e16 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -58,13 +58,14 @@ pkt_burst_receive(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 
 	fs->rx_packets += nb_rx;
 	for (i = 0; i < nb_rx; i++)
 		rte_pktmbuf_free(pkts_burst[i]);
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/shared_rxq_fwd.c b/app/test-pmd/shared_rxq_fwd.c
index 2e9047804b5b..395b73bfe52e 100644
--- a/app/test-pmd/shared_rxq_fwd.c
+++ b/app/test-pmd/shared_rxq_fwd.c
@@ -102,9 +102,10 @@ shared_rxq_fwd(struct fwd_stream *fs)
 				 nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
-		return;
+		goto end;
 	forward_shared_rxq(fs, nb_rx, pkts_burst);
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 134d79a55547..450bc281fd69 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2248,20 +2248,26 @@ static void
 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
 {
 	struct fwd_stream **fsm;
+	uint64_t tics_per_1sec;
+	uint64_t tics_current;
+	uint64_t tics;
 	streamid_t nb_fs;
 	streamid_t sm_id;
+	int interval, loop;
 #ifdef RTE_LIB_BITRATESTATS
-	uint64_t tics_per_1sec;
 	uint64_t tics_datum;
-	uint64_t tics_current;
 	uint16_t i, cnt_ports;
 
 	cnt_ports = nb_ports;
 	tics_datum = rte_rdtsc();
-	tics_per_1sec = rte_get_timer_hz();
 #endif
+	tics_per_1sec = rte_get_timer_hz();
+	tics = rte_rdtsc();
 	fsm = &fwd_streams[fc->stream_idx];
 	nb_fs = fc->stream_nb;
+	fc->lcore_id = rte_lcore_id();
+	interval = 0;
+	loop = 0;
 	do {
 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
 			if (!fsm[sm_id]->disabled)
@@ -2284,8 +2290,58 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
 				latencystats_lcore_id == rte_lcore_id())
 			rte_latencystats_update();
 #endif
-
+		if (record_core_cycles && ++loop == 1024) {
+			loop = 0;
+			tics_current = rte_rdtsc();
+			if (tics_current - tics >= tics_per_1sec * LCORE_CYCLES_INTERVAL_LEN) {
+				for (sm_id = 0; sm_id < nb_fs; sm_id++) {
+					struct fwd_stream *fs = fsm[sm_id];
+					if (fs->disabled)
+						continue;
+					fc->cycles[interval].busy += fs->interval_cycles.busy;
+					fc->cycles[interval].total += fs->interval_cycles.total;
+					fs->interval_cycles.busy = 0;
+					fs->interval_cycles.total = 0;
+				}
+				interval += 1;
+				if (interval == LCORE_CYCLES_INTERVAL_COUNT) {
+					memmove(&fc->cycles[0], &fc->cycles[1],
+						(LCORE_CYCLES_INTERVAL_COUNT - 1)
+						* sizeof(fc->cycles[0]));
+					interval = 0;
+				}
+				fc->cycles[interval].busy = 0;
+				fc->cycles[interval].total = 0;
+				tics = tics_current;
+			}
+		}
 	} while (! fc->stopped);
+	memset(&fc->cycles, 0, sizeof(fc->cycles));
+}
+
+static int
+lcore_busy_percent_callback(unsigned int lcore_id)
+{
+	uint64_t busy_cycles, total_cycles;
+	struct fwd_lcore *fc;
+	int i, c;
+
+	for (c = 0; c < nb_lcores; c++) {
+		fc = fwd_lcores[c];
+		if (fc->lcore_id != lcore_id)
+			continue;
+		busy_cycles = total_cycles = 0;
+
+		for (i = 0; i < LCORE_CYCLES_INTERVAL_COUNT; i++) {
+			busy_cycles += fc->cycles[i].busy;
+			total_cycles += fc->cycles[i].total;
+		}
+		if (total_cycles == 0)
+			return -1;
+		return 100 * busy_cycles / total_cycles;
+	}
+
+	return -1;
 }
 
 static int
@@ -4522,6 +4578,11 @@ main(int argc, char** argv)
 		rte_stats_bitrate_reg(bitrate_data);
 	}
 #endif
+
+	if (record_core_cycles)
+		rte_lcore_register_busy_percent_cb(lcore_busy_percent_callback,
+			LCORE_CYCLES_INTERVAL_LEN * LCORE_CYCLES_INTERVAL_COUNT);
+
 #ifdef RTE_LIB_CMDLINE
 	if (init_cmdline() != 0)
 		rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 7d24d25970d2..684a06919986 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -145,6 +145,14 @@ extern const struct rss_type_info rss_type_table[];
  */
 extern char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
 
+/**
+ * Used with --record-core-cycles.
+ */
+struct lcore_cycles {
+	uint64_t busy;
+	uint64_t total;
+};
+
 /**
  * The data structure associated with a forwarding stream between a receive
  * port/queue and a transmit port/queue.
@@ -175,6 +183,7 @@ struct fwd_stream {
 	unsigned int gro_times;	/**< GRO operation times */
 #endif
 	uint64_t     core_cycles; /**< used for RX and TX processing */
+	struct lcore_cycles interval_cycles;
 	struct pkt_burst_stats rx_burst_stats;
 	struct pkt_burst_stats tx_burst_stats;
 	struct fwd_lcore *lcore; /**< Lcore being scheduled. */
@@ -341,6 +350,9 @@ struct rte_port {
 	struct xstat_display_info xstats_info;
 };
 
+#define LCORE_CYCLES_INTERVAL_COUNT 6
+#define LCORE_CYCLES_INTERVAL_LEN 10
+
 /**
  * The data structure associated with each forwarding logical core.
  * The logical cores are internally numbered by a core index from 0 to
@@ -360,6 +372,8 @@ struct fwd_lcore {
 	streamid_t stream_nb;    /**< number of streams in "fwd_streams" */
 	lcoreid_t  cpuid_idx;    /**< index of logical core in CPU id table */
 	volatile char stopped;   /**< stop forwarding when set */
+	unsigned int lcore_id;   /**< return value of rte_lcore_id() */
+	struct lcore_cycles cycles[LCORE_CYCLES_INTERVAL_COUNT];  /**< busy percent stats */
 };
 
 /*
@@ -836,10 +850,15 @@ get_start_cycles(uint64_t *start_tsc)
 }
 
 static inline void
-get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc)
+get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc, uint64_t nb_packets)
 {
-	if (record_core_cycles)
-		fs->core_cycles += rte_rdtsc() - start_tsc;
+	if (record_core_cycles) {
+		uint64_t cycles = rte_rdtsc() - start_tsc;
+		fs->core_cycles += cycles;
+		fs->interval_cycles.total += cycles;
+		if (nb_packets > 0)
+			fs->interval_cycles.busy += cycles;
+	}
 }
 
 static inline void
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 021624952daa..ad37626ff63c 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -331,7 +331,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	struct rte_mbuf *pkt;
 	struct rte_mempool *mbp;
 	struct rte_ether_hdr eth_hdr;
-	uint16_t nb_tx;
+	uint16_t nb_tx = 0;
 	uint16_t nb_pkt;
 	uint16_t vlan_tci, vlan_tci_outer;
 	uint32_t retry;
@@ -392,7 +392,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	}
 
 	if (nb_pkt == 0)
-		return;
+		goto end;
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 
@@ -426,7 +426,8 @@ pkt_burst_transmit(struct fwd_stream *fs)
 		} while (++nb_tx < nb_pkt);
 	}
 
-	get_end_cycles(fs, start_tsc);
+end:
+	get_end_cycles(fs, start_tsc, nb_tx);
 }
 
 static int
-- 
2.38.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-11-24  0:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-23 10:19 [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
2022-11-23 10:19 ` [RFC PATCH 1/4] eal: add lcore info in telemetry Robin Jarry
2022-11-23 10:19 ` [RFC PATCH 2/4] eal: allow applications to report their cpu utilization Robin Jarry
2022-11-23 11:52   ` Morten Brørup
2022-11-23 13:29     ` Robin Jarry
2022-11-23 16:38   ` Stephen Hemminger
2022-11-23 10:19 ` [RFC PATCH 3/4] testpmd: add show lcores command Robin Jarry
2022-11-24  0:54   ` fengchengwen
2022-11-23 10:19 ` [RFC PATCH 4/4] testpmd: report lcore usage Robin Jarry
2022-11-23 10:44 ` [RFC PATCH 0/4] lcore telemetry improvements Robin Jarry
2022-11-23 10:26 Robin Jarry
2022-11-23 10:26 ` [RFC PATCH 4/4] testpmd: report lcore usage Robin Jarry

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).