From: Anthony Harivel <aharivel@redhat.com>
To: Aman Singh <aman.deep.singh@intel.com>,
Yuying Zhang <yuying.zhang@intel.com>
Cc: dev@dpdk.org, rjarry@redhat.com, david.marchand@redhat.com,
ktraynor@redhat.com, Anthony Harivel <aharivel@redhat.com>
Subject: [PATCH 2/2] app/testpmd: add testpmd based sleeping
Date: Thu, 16 Mar 2023 16:14:38 +0100 [thread overview]
Message-ID: <20230316151438.186241-2-aharivel@redhat.com> (raw)
In-Reply-To: <20230316151438.186241-1-aharivel@redhat.com>
Sleep for an incremental amount of time if the fwd engine has processed
less than at least half a burst of packets (i.e 16pkts with default
setting) on a polling iteration of testpmd.
Upon detecting the threshold of >= 16 pkts on an Rxq, reset the sleep
time to zero (i.e. no sleep).
Sleep time will be increased on each iteration where the low load
conditions remain up to a total of the max sleep time which is set by
the user with the "--max-sleep-us NUM" command line argument or when in
interactive "mode set max_sleep NUM".
The default max_sleep value is 0, which means that no sleeps will occur
and the default behaviour is unchanged from previously.
Testing has been performed on AMD EPYC 7702 server with --nb-cores 12.
The results were obtained via turbostat for each individual lcore:
max_sleep 0 ====== ======== ======== ========
idle 4Mpps 16Mpps Bursts
=============== ====== ======== ======== ========
C1-state % 0 0 0 0
C2-state % 0 0 0 0
% usage 100 100 100 100
Watt / core 1.14 1.18 1.19 1.14
=============== ====== ======== ======== ========
max_sleep 500 ====== ======== ======== ========
idle 4Mpps 16Mpps Bursts
=============== ====== ======== ======== ========
C1-state % 99 85 74 98.6
C2-state % 0 0 0 0
% usage 1 15 26 1
Watt / core 0.04 0.18 0.28 0 04
=============== ====== ======== ======== ========
max_sleep 1000 ====== ======== ======== ========
idle 4Mpps 16Mpps Bursts
=============== ====== ======== ======== ========
C1-state % 0 85 74 0.3
C2-state % 99 0 0 97.6
% usage 1 15 25 1
Watt / core 0.02 0.18 0.28 0 02
=============== ====== ======== ======== ========
On most cases, the consumption of the cores is greatly improved while
still performing zero packet loss.
Latency test has been performed on each tests above. The CPU has a C1
latency of 1us and a C2 latency of 400us. On the worst case scenario, Tx
Burst of thousands packets every seconds, the following latency in us
(micro seconds) has been observed:
=========== ==== ===== ======
max_sleep 0 500 1000
----------- ---- ----- ------
max latency 14 560 1260
min latency 5 5 6
Avg latency 7 305 617
=========== ==== ===== ======
link: https://www.github.com/torvalds/linux/tree/master/tools/power/x86/turbostat
Signed-off-by: Anthony Harivel <aharivel@redhat.com>
---
app/test-pmd/cmdline.c | 78 +++++++++++++++++++++
app/test-pmd/parameters.c | 14 ++++
app/test-pmd/testpmd.c | 24 ++++++-
app/test-pmd/testpmd.h | 2 +
doc/guides/testpmd_app_ug/run_app.rst | 19 +++++
doc/guides/testpmd_app_ug/testpmd_funcs.rst | 13 ++++
6 files changed, 149 insertions(+), 1 deletion(-)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 02c72d06b768..b89aae1c3414 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -5522,6 +5522,82 @@ static cmdline_parse_inst_t cmd_set_link_check = {
},
};
+/* *** SET MAX SLEEP *** */
+struct cmd_max_sleep_result {
+ cmdline_fixed_string_t set;
+ cmdline_fixed_string_t max_sleep;
+ uint64_t value;
+};
+
+static void
+cmd_max_sleep_set_parsed(void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ __rte_unused void *data)
+{
+ struct cmd_max_sleep_result *res = parsed_result;
+ if ((!strcmp(res->set, "set"))
+ && (!strcmp(res->max_sleep, "max_sleep")))
+ max_sleep_us = res->value;
+}
+
+static cmdline_parse_token_string_t cmd_max_sleep_set =
+ TOKEN_STRING_INITIALIZER(struct cmd_max_sleep_result,
+ set, "set");
+static cmdline_parse_token_string_t cmd_max_sleep =
+ TOKEN_STRING_INITIALIZER(struct cmd_max_sleep_result,
+ max_sleep, "max_sleep");
+static cmdline_parse_token_num_t cmd_max_sleep_value =
+ TOKEN_NUM_INITIALIZER(struct cmd_max_sleep_result,
+ value, RTE_UINT64);
+
+static cmdline_parse_inst_t cmd_set_max_sleep = {
+ .f = cmd_max_sleep_set_parsed,
+ .data = NULL,
+ .help_str = "set max_sleep <value>: Set the maximum sleep in micro seconds.",
+ .tokens = {
+ (void *)&cmd_max_sleep_set,
+ (void *)&cmd_max_sleep,
+ (void *)&cmd_max_sleep_value,
+ NULL,
+ },
+};
+
+/* *** SHOW MAX SLEEP *** */
+struct cmd_max_sleep_show_result {
+ cmdline_fixed_string_t show;
+ cmdline_fixed_string_t max_sleep;
+ uint64_t value;
+};
+
+static void
+cmd_max_sleep_show_parsed(void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ __rte_unused void *data)
+{
+ struct cmd_max_sleep_show_result *res = parsed_result;
+ if ((!strcmp(res->show, "show"))
+ && (!strcmp(res->max_sleep, "max_sleep")))
+ printf("max_sleep %"PRIu64" us\n", max_sleep_us);
+}
+
+static cmdline_parse_token_string_t cmd_show_max_sleep_show =
+ TOKEN_STRING_INITIALIZER(struct cmd_max_sleep_show_result,
+ show, "show");
+static cmdline_parse_token_string_t cmd_max_sleep_show =
+ TOKEN_STRING_INITIALIZER(struct cmd_max_sleep_show_result,
+ max_sleep, "max_sleep");
+
+static cmdline_parse_inst_t cmd_show_max_sleep = {
+ .f = cmd_max_sleep_show_parsed,
+ .data = NULL,
+ .help_str = "show max_sleep: Display the maximun sleep in micro seconds.",
+ .tokens = {
+ (void *)&cmd_show_max_sleep_show,
+ (void *)&cmd_max_sleep_show,
+ NULL,
+ },
+};
+
/* *** SET FORWARDING MODE *** */
struct cmd_set_fwd_mode_result {
cmdline_fixed_string_t set;
@@ -12770,6 +12846,8 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
(cmdline_parse_inst_t *)&cmd_set_allmulti_mode_all,
(cmdline_parse_inst_t *)&cmd_set_flush_rx,
(cmdline_parse_inst_t *)&cmd_set_link_check,
+ (cmdline_parse_inst_t *)&cmd_set_max_sleep,
+ (cmdline_parse_inst_t *)&cmd_show_max_sleep,
(cmdline_parse_inst_t *)&cmd_vlan_offload,
(cmdline_parse_inst_t *)&cmd_vlan_tpid,
(cmdline_parse_inst_t *)&cmd_rx_vlan_filter_all,
diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index 3b37809baf00..6cb4ce9edb0a 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -201,6 +201,9 @@ usage(char* progname)
printf(" --rx-mq-mode=0xX: hexadecimal bitmask of RX mq mode can be "
"enabled\n");
printf(" --record-core-cycles: enable measurement of CPU cycles.\n");
+ printf(" --max-sleep-us: maximum sleep time that will be requested in\n"
+ " microseconds per iteration of packet processing which has received zero\n"
+ " or a small amount of packets from the Rx queues it is polling.\n");
printf(" --record-burst-stats: enable display of RX and TX bursts.\n");
printf(" --hairpin-mode=0xXX: bitmask set the hairpin port mode.\n"
" 0x10 - explicit Tx rule, 0x02 - hairpin ports paired\n"
@@ -707,6 +710,7 @@ launch_args_parse(int argc, char** argv)
{ "no-iova-contig", 0, 0, 0 },
{ "rx-mq-mode", 1, 0, 0 },
{ "record-core-cycles", 0, 0, 0 },
+ { "max-sleep-us", 1, 0, 0 },
{ "record-burst-stats", 0, 0, 0 },
{ PARAM_NUM_PROCS, 1, 0, 0 },
{ PARAM_PROC_ID, 1, 0, 0 },
@@ -1459,6 +1463,16 @@ launch_args_parse(int argc, char** argv)
}
if (!strcmp(lgopts[opt_idx].name, "record-core-cycles"))
record_core_cycles = 1;
+ if (!strcmp(lgopts[opt_idx].name, "max-sleep-us")) {
+ char *end = NULL;
+ errno = 0;
+ unsigned long sleep = strtoul(optarg, &end, 10);
+
+ if (errno != 0 || *optarg == '\0' || *end != '\0' || sleep == 0)
+ rte_exit(EXIT_FAILURE, "max-sleep-us must be > 0\n");
+
+ max_sleep_us = sleep;
+ }
if (!strcmp(lgopts[opt_idx].name, "record-burst-stats"))
record_burst_stats = 1;
if (!strcmp(lgopts[opt_idx].name, PARAM_NUM_PROCS))
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index e25c082387f5..ccd9be175c59 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -524,6 +524,11 @@ uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
*/
enum rte_eth_rx_mq_mode rx_mq_mode = RTE_ETH_MQ_RX_VMDQ_DCB_RSS;
+/*
+ * Max sleep time requested in microseconds per iteration
+ */
+uint64_t max_sleep_us;
+
/*
* Used to set forced link speed
*/
@@ -2254,6 +2259,8 @@ static void
run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
{
struct fwd_stream **fsm;
+ uint64_t sleep_us = 0;
+ uint64_t sleep_cycles;
uint64_t prev_tsc;
streamid_t nb_fs;
streamid_t sm_id;
@@ -2283,6 +2290,8 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
pkts = (*pkt_fwd)(fs);
if (record_core_cycles && pkts > 0)
fs->busy_cycles += rte_rdtsc() - start_fs_tsc;
+ if (pkts > nb_pkt_per_burst / 2)
+ sleep_us = 0;
}
#ifdef RTE_LIB_BITRATESTATS
if (bitrate_enabled != 0 &&
@@ -2302,10 +2311,23 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
latencystats_lcore_id == rte_lcore_id())
rte_latencystats_update();
#endif
+ sleep_cycles = 0;
+ if (max_sleep_us) {
+ /* Check if a sleep should happen on this iteration. */
+ if (sleep_us > 0) {
+ uint64_t tsc = rte_rdtsc();
+
+ rte_delay_us_sleep(sleep_us);
+ sleep_cycles = rte_rdtsc() - tsc;
+ }
+ if (sleep_us < max_sleep_us)
+ /* Increase sleep time for next iteration. */
+ sleep_us += 1;
+ }
if (record_core_cycles) {
uint64_t tsc = rte_rdtsc();
- fc->total_cycles += tsc - prev_tsc;
+ fc->total_cycles += tsc - prev_tsc - sleep_cycles;
prev_tsc = tsc;
}
} while (! fc->stopped);
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index b399a0dc7f84..63758831d6c9 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -779,6 +779,8 @@ extern struct mplsoudp_decap_conf mplsoudp_decap_conf;
extern enum rte_eth_rx_mq_mode rx_mq_mode;
+extern uint64_t max_sleep_us;
+
extern struct rte_flow_action_conntrack conntrack_context;
extern int proc_id;
diff --git a/doc/guides/testpmd_app_ug/run_app.rst b/doc/guides/testpmd_app_ug/run_app.rst
index 57b23241cf36..c5afe44e680f 100644
--- a/doc/guides/testpmd_app_ug/run_app.rst
+++ b/doc/guides/testpmd_app_ug/run_app.rst
@@ -555,6 +555,25 @@ The command line options are:
The default value is 0. Hairpin will use single port mode and implicit Tx flow mode.
+* ``--max-sleep-us=N``
+
+ Set the maximum sleep in micro seconds. The default value is 0.
+
+ When `max-sleep-us` is set, the lcores running the packet forwarding may stop active polling and
+ go to sleep for an incrementing amount of time. Each time the forwarding engine processes less
+ than half a burst of packets, the sleep time will be incremented by 1 micro second, up to the
+ maximum value set by the user.
+
+ At any point, if the forwarding engine returns more than half a burst of packets, the sleep time
+ will be reset to 0.
+
+ Sleeping in the packet processing path yields back control to the kernel scheduler. The actual
+ sleep/wakeup times are not guaranteed and may differ significantly depending on system
+ configuration, allowed C-states and scheduler timer resolution (on Linux, this is controlled by
+ ``prctl(PR_SET_TIMERSLACK, nanoseconds)`` and it defaults to 10 micro seconds).
+
+ In interactive mode, the maximum sleep time can be set with ``set max_sleep N`` and displayed
+ with ``show max_sleep``.
Testpmd Multi-Process Command-line Options
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index 8f2384785930..53a67f02b702 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -637,6 +637,12 @@ Show fec mode of a port::
testpmd> show port (port_id) fec_mode
+show max_sleep
+~~~~~~~~~~~~~~
+
+Show max_sleep in micro seconds::
+
+ testpmd> show max_sleep
Configuration Functions
-----------------------
@@ -1802,6 +1808,13 @@ Set fec mode for a specific port::
testpmd> set port (port_id) fec_mode auto|off|rs|baser
+Set max_sleep
+~~~~~~~~~~~~
+
+Set max_sleep in micro seconds::
+
+ testpmd> set max_sleep <us>
+
Config Sample actions list
~~~~~~~~~~~~~~~~~~~~~~~~~~
--
2.39.2
next prev parent reply other threads:[~2023-03-16 15:14 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-16 15:14 [PATCH 1/2] app/testpmd: fwd engines report processed packets Anthony Harivel
2023-03-16 15:14 ` Anthony Harivel [this message]
2023-03-16 16:17 ` [PATCH 2/2] app/testpmd: add testpmd based sleeping Stephen Hemminger
2023-03-16 17:05 ` Ferruh Yigit
2023-03-17 11:09 ` Anthony Harivel
2023-03-17 16:22 ` Stephen Hemminger
2023-03-21 11:45 ` Ferruh Yigit
2023-03-17 19:25 ` Robin Jarry
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230316151438.186241-2-aharivel@redhat.com \
--to=aharivel@redhat.com \
--cc=aman.deep.singh@intel.com \
--cc=david.marchand@redhat.com \
--cc=dev@dpdk.org \
--cc=ktraynor@redhat.com \
--cc=rjarry@redhat.com \
--cc=yuying.zhang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).