From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 3C0D131FC for ; Tue, 19 Jun 2018 12:30:01 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 19 Jun 2018 03:30:00 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.51,242,1526367600"; d="scan'208";a="238701627" Received: from dhunt5-mobl2.ger.corp.intel.com (HELO [10.237.221.44]) ([10.237.221.44]) by fmsmga006.fm.intel.com with ESMTP; 19 Jun 2018 03:29:58 -0700 To: Liang Ma Cc: dev@dpdk.org, radu.nicolau@intel.com References: <1528451833-3617-1-git-send-email-liang.j.ma@intel.com> <1528451833-3617-2-git-send-email-liang.j.ma@intel.com> From: "Hunt, David" Message-ID: Date: Tue, 19 Jun 2018 11:31:30 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Thunderbird/52.8.0 MIME-Version: 1.0 In-Reply-To: <1528451833-3617-2-git-send-email-liang.j.ma@intel.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-Language: en-US Subject: Re: [dpdk-dev] [PATCH v1 2/2] examples/l3fwd-power: simple app update to support new API X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 19 Jun 2018 10:30:02 -0000 Hi Liang, On 8/6/2018 10:57 AM, Liang Ma wrote: > Add the support for new traffic pattern aware power control > power management API. > > Example: > ./l3fwd-power -l xxx -n 4 -w 0000:xx:00.0 -w 0000:xx:00.1 -- -p 0x3 > -P --config="(0,0,xx),(1,0,xx)" --empty-poll Suggest expanding out each of the above options and explaining what each one does. Maybe also explain what traffic pattern aware power control is i.e. there's stats gathered every poll that counts how many packets received. Then every 100mS these stats are analysed and a decision is taken to scale up/down the core. Maybe also mention there's a sliding window to reduce excessive hysteresis. > Signed-off-by: Liang Ma > --- > examples/l3fwd-power/main.c | 229 ++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 211 insertions(+), 18 deletions(-) > > diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c > index 596d645..22a0e4e 100644 > --- a/examples/l3fwd-power/main.c > +++ b/examples/l3fwd-power/main.c > @@ -43,6 +43,7 @@ > #include > #include > #include > +#include > > #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 > > @@ -129,6 +130,9 @@ static uint32_t enabled_port_mask = 0; > static int promiscuous_on = 0; > /* NUMA is enabled by default. */ > static int numa_on = 1; > +/* emptypoll is disabled by default. */ > +static bool empty_poll_on; > +volatile bool empty_poll_stop; > static int parse_ptype; /**< Parse packet type using rx callback, and */ > /**< disabled by default */ > > @@ -336,6 +340,10 @@ static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count); > static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \ > unsigned int lcore_id, uint16_t port_id, uint16_t queue_id); > > +static int is_done(void) > +{ > + return empty_poll_stop; > +} > /* exit signal handler */ > static void > signal_exit_now(int sigtype) > @@ -344,7 +352,15 @@ signal_exit_now(int sigtype) > unsigned int portid; > int ret; > > + RTE_SET_USED(lcore_id); > + RTE_SET_USED(portid); > + RTE_SET_USED(ret); > + > if (sigtype == SIGINT) { > + if (empty_poll_on) > + empty_poll_stop = true; > + > + > for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { > if (rte_lcore_is_enabled(lcore_id) == 0) > continue; > @@ -353,20 +369,23 @@ signal_exit_now(int sigtype) > ret = rte_power_exit(lcore_id); > if (ret) > rte_exit(EXIT_FAILURE, "Power management " > - "library de-initialization failed on " > - "core%u\n", lcore_id); > + "library de-initialization failed on " > + "core%u\n", lcore_id); > } > > - RTE_ETH_FOREACH_DEV(portid) { > - if ((enabled_port_mask & (1 << portid)) == 0) > - continue; > + if (!empty_poll_on) { > + RTE_ETH_FOREACH_DEV(portid) { > + if ((enabled_port_mask & (1 << portid)) == 0) > + continue; > > - rte_eth_dev_stop(portid); > - rte_eth_dev_close(portid); > + rte_eth_dev_stop(portid); > + rte_eth_dev_close(portid); > + } > } > } > > - rte_exit(EXIT_SUCCESS, "User forced exit\n"); > + if (!empty_poll_on) > + rte_exit(EXIT_SUCCESS, "User forced exit\n"); > } > > /* Freqency scale down timer callback */ > @@ -831,6 +850,108 @@ static int event_register(struct lcore_conf *qconf) > > return 0; > } > +/* main processing loop */ > +static int > +main_empty_poll_loop(__attribute__((unused)) void *dummy) > +{ > + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; > + unsigned int lcore_id; > + uint64_t prev_tsc, diff_tsc, cur_tsc; > + int i, j, nb_rx; > + uint8_t queueid; > + uint16_t portid; > + struct lcore_conf *qconf; > + struct lcore_rx_queue *rx_queue; > + > + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; > + > + prev_tsc = 0; > + > + lcore_id = rte_lcore_id(); > + qconf = &lcore_conf[lcore_id]; > + > + if (qconf->n_rx_queue == 0) { > + RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id); > + return 0; > + } > + > + RTE_LOG(INFO, L3FWD_POWER, "entering main empty_poll loop on lcore %u\n", lcore_id); > + > + for (i = 0; i < qconf->n_rx_queue; i++) { > + portid = qconf->rx_queue_list[i].port_id; > + queueid = qconf->rx_queue_list[i].queue_id; > + RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u " > + "rxqueueid=%hhu\n", lcore_id, portid, queueid); > + } > + > + while (!is_done()) { > + stats[lcore_id].nb_iteration_looped++; > + > + cur_tsc = rte_rdtsc(); > + /* > + * TX burst queue drain > + */ > + diff_tsc = cur_tsc - prev_tsc; > + if (unlikely(diff_tsc > drain_tsc)) { > + for (i = 0; i < qconf->n_tx_port; ++i) { > + portid = qconf->tx_port_id[i]; > + rte_eth_tx_buffer_flush(portid, > + qconf->tx_queue_id[portid], > + qconf->tx_buffer[portid]); > + } > + prev_tsc = cur_tsc; > + } > + > + /* > + * Read packet from RX queues > + */ > + for (i = 0; i < qconf->n_rx_queue; ++i) { > + rx_queue = &(qconf->rx_queue_list[i]); > + rx_queue->idle_hint = 0; > + portid = rx_queue->port_id; > + queueid = rx_queue->queue_id; > + > + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, > + MAX_PKT_BURST); > + > + stats[lcore_id].nb_rx_processed += nb_rx; > + > + if (nb_rx == 0) { > + > + rte_empty_poll_stat_update(lcore_id); > + > + continue; > + } else { > + rte_poll_stat_update(lcore_id, nb_rx); > + } > + > + > + /* Prefetch first packets */ > + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { > + rte_prefetch0(rte_pktmbuf_mtod( > + pkts_burst[j], void *)); > + } > + > + /* Prefetch and forward already prefetched packets */ > + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { > + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ > + j + PREFETCH_OFFSET], void *)); > + l3fwd_simple_forward(pkts_burst[j], portid, > + qconf); > + } > + > + /* Forward remaining prefetched packets */ > + for (; j < nb_rx; j++) { > + l3fwd_simple_forward(pkts_burst[j], portid, > + qconf); > + } > + > + } > + > + } > + > + return 0; > +} > I see there's a new main loop added - main_empty_poll_loop. I think this is OK for the moment, but we might think about separating this out into another file in the future. There may be other power saving mechanisms that we'd want to implement in this way as well, rather than having a different l3-fwd app for each one. OK for the moment, IMO. > /* main processing loop */ > static int > @@ -1128,7 +1249,8 @@ print_usage(const char *prgname) > " --no-numa: optional, disable numa awareness\n" > " --enable-jumbo: enable jumbo frame" > " which max packet len is PKTLEN in decimal (64-9600)\n" > - " --parse-ptype: parse packet type by software\n", > + " --parse-ptype: parse packet type by software\n" > + " --empty=poll: enable empty poll detection\n", typo, should be empty-poll > prgname); > } > > @@ -1231,10 +1353,12 @@ parse_args(int argc, char **argv) > int opt, ret; > char **argvopt; > int option_index; > + uint32_t limit; > char *prgname = argv[0]; > static struct option lgopts[] = { > {"config", 1, 0, 0}, > {"no-numa", 0, 0, 0}, > + {"empty-poll", 0, 0, 0}, > {"enable-jumbo", 0, 0, 0}, > {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, > {NULL, 0, 0, 0} > @@ -1259,7 +1383,18 @@ parse_args(int argc, char **argv) > printf("Promiscuous mode selected\n"); > promiscuous_on = 1; > break; > - > + case 'l': > + limit = parse_portmask(optarg); > + rte_empty_poll_set_freq(LOW, limit); > + break; > + case 'm': > + limit = parse_portmask(optarg); > + rte_empty_poll_set_freq(MED, limit); > + break; > + case 'h': > + limit = parse_portmask(optarg); > + rte_empty_poll_set_freq(HGH, limit); > + break; > /* long options */ > case 0: > if (!strncmp(lgopts[option_index].name, "config", 6)) { > @@ -1278,6 +1413,12 @@ parse_args(int argc, char **argv) > } > > if (!strncmp(lgopts[option_index].name, > + "empty-poll", 10)) { > + printf("empty-poll is enabled\n"); > + empty_poll_on = true; > + } > + > + if (!strncmp(lgopts[option_index].name, > "enable-jumbo", 12)) { > struct option lenopts = > {"max-pkt-len", required_argument, \ > @@ -1609,6 +1750,41 @@ static int check_ptype(uint16_t portid) > > } > > +static int > +launch_timer(unsigned int lcore_id) > +{ > + int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms; > + > + RTE_SET_USED(lcore_id); > + > + > + if (rte_get_master_lcore() != lcore_id) { > + rte_panic("timer on lcore:%d which is not master core:%d\n", > + lcore_id, > + rte_get_master_lcore()); > + } > + > + RTE_LOG(INFO, POWER, "Bring up the Timer\n"); > + > + rte_empty_poll_setup_timer(); > + > + cycles_10ms = rte_get_timer_hz() / 100; > + > + while (!is_done()) { > + cur_tsc = rte_rdtsc(); > + diff_tsc = cur_tsc - prev_tsc; > + if (diff_tsc > cycles_10ms) { > + rte_timer_manage(); > + prev_tsc = cur_tsc; > + cycles_10ms = rte_get_timer_hz() / 100; > + } > + } > + > + RTE_LOG(INFO, POWER, "Timer_subsystem is done\n"); > + > + return 0; > +} > + > int > main(int argc, char **argv) > { > @@ -1780,14 +1956,15 @@ main(int argc, char **argv) > "Library initialization failed on core %u\n", lcore_id); > > /* init timer structures for each enabled lcore */ > - rte_timer_init(&power_timers[lcore_id]); > - hz = rte_get_timer_hz(); > - rte_timer_reset(&power_timers[lcore_id], > - hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id, > - power_timer_cb, NULL); > - > + if (empty_poll_on == false) { > + rte_timer_init(&power_timers[lcore_id]); > + hz = rte_get_timer_hz(); > + rte_timer_reset(&power_timers[lcore_id], > + hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id, > + power_timer_cb, NULL); > + } > qconf = &lcore_conf[lcore_id]; > - printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); > + printf("\nInitializing rx queues on lcore %u ...\n", lcore_id); > fflush(stdout); > /* init RX queues */ > for(queue = 0; queue < qconf->n_rx_queue; ++queue) { > @@ -1856,12 +2033,28 @@ main(int argc, char **argv) > > check_all_ports_link_status(enabled_port_mask); > > + if (empty_poll_on == true) > + rte_empty_poll_stat_init(); > + > + > /* launch per-lcore init on every lcore */ > - rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); > + if (empty_poll_on == false) { > + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); > + } else { > + empty_poll_stop = false; > + rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, SKIP_MASTER); > + } > + > + if (empty_poll_on == true) > + launch_timer(rte_lcore_id()); > + > RTE_LCORE_FOREACH_SLAVE(lcore_id) { > if (rte_eal_wait_lcore(lcore_id) < 0) > return -1; > } > > + if (empty_poll_on) > + rte_empty_poll_stat_free(); > + > return 0; > }