From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 445491F5 for ; Thu, 29 Jan 2015 05:07:24 +0100 (CET) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga101.jf.intel.com with ESMTP; 28 Jan 2015 20:07:23 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.09,484,1418112000"; d="scan'208";a="644286651" Received: from kmsmsx153.gar.corp.intel.com ([172.21.73.88]) by orsmga001.jf.intel.com with ESMTP; 28 Jan 2015 20:07:22 -0800 Received: from shsmsx102.ccr.corp.intel.com (10.239.4.154) by KMSMSX153.gar.corp.intel.com (172.21.73.88) with Microsoft SMTP Server (TLS) id 14.3.195.1; Thu, 29 Jan 2015 12:07:20 +0800 Received: from shsmsx104.ccr.corp.intel.com ([169.254.5.231]) by shsmsx102.ccr.corp.intel.com ([169.254.2.124]) with mapi id 14.03.0195.001; Thu, 29 Jan 2015 12:07:18 +0800 From: "Zhou, Danny" To: "Liang, Cunming" , "dev@dpdk.org" Thread-Topic: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Thread-Index: AQHQOuApfWQre+OafE+V0G/1ZGMJ05zV2QqAgACjFPA= Date: Thu, 29 Jan 2015 04:07:18 +0000 Message-ID: References: <1422438631-7853-1-git-send-email-danny.zhou@intel.com> <1422438631-7853-6-git-send-email-danny.zhou@intel.com> In-Reply-To: Accept-Language: zh-CN, en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.239.127.40] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 29 Jan 2015 04:07:25 -0000 > -----Original Message----- > From: Liang, Cunming > Sent: Thursday, January 29, 2015 2:34 AM > To: Zhou, Danny; dev@dpdk.org > Subject: RE: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx in= terrupt and polling/interrupt mode switch >=20 >=20 >=20 > > -----Original Message----- > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Danny Zhou > > Sent: Wednesday, January 28, 2015 2:51 AM > > To: dev@dpdk.org > > Subject: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx inte= rrupt > > and polling/interrupt mode switch > > > > Signed-off-by: Danny Zhou > > --- > > examples/l3fwd-power/main.c | 170 > > +++++++++++++++++++++++++++++++++----------- > > 1 file changed, 129 insertions(+), 41 deletions(-) > > > > diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c > > index f6b55b9..e6e4f55 100644 > > --- a/examples/l3fwd-power/main.c > > +++ b/examples/l3fwd-power/main.c > > @@ -75,12 +75,13 @@ > > #include > > #include > > #include > > +#include > > > > #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 > > > > #define MAX_PKT_BURST 32 > > > > -#define MIN_ZERO_POLL_COUNT 5 > > +#define MIN_ZERO_POLL_COUNT 10 > > > > /* around 100ms at 2 Ghz */ > > #define TIMER_RESOLUTION_CYCLES 200000000ULL > > @@ -188,6 +189,9 @@ struct lcore_rx_queue { > > #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS > > #define MAX_RX_QUEUE_PER_PORT 128 > > > > +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 > > + > > + > > #define MAX_LCORE_PARAMS 1024 > > struct lcore_params { > > uint8_t port_id; > > @@ -214,7 +218,7 @@ static uint16_t nb_lcore_params =3D > > sizeof(lcore_params_array_default) / > > > > static struct rte_eth_conf port_conf =3D { > > .rxmode =3D { > > - .mq_mode =3D ETH_MQ_RX_RSS, > > + .mq_mode =3D ETH_MQ_RX_RSS, > > .max_rx_pkt_len =3D ETHER_MAX_LEN, > > .split_hdr_size =3D 0, > > .header_split =3D 0, /**< Header Split disabled */ > > @@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf =3D { > > .rx_adv_conf =3D { > > .rss_conf =3D { > > .rss_key =3D NULL, > > - .rss_hf =3D ETH_RSS_IP, > > + .rss_hf =3D ETH_RSS_UDP, > > }, > > }, > > .txmode =3D { > > - .mq_mode =3D ETH_DCB_NONE, > > + .mq_mode =3D ETH_MQ_TX_NONE, > > + }, > > + .intr_conf =3D { > > + .rxq =3D 1, /**< rxq interrupt feature enabled */ > > }, > > }; > > > > @@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct > > rte_timer *tim, > > /* accumulate total execution time in us when callback is invoked */ > > sleep_time_ratio =3D (float)(stats[lcore_id].sleep_time) / > > (float)SCALING_PERIOD; > > - > > /** > > * check whether need to scale down frequency a step if it sleep a lo= t. > > */ > > - if (sleep_time_ratio >=3D SCALING_DOWN_TIME_RATIO_THRESHOLD) > > - rte_power_freq_down(lcore_id); > > + if (sleep_time_ratio >=3D SCALING_DOWN_TIME_RATIO_THRESHOLD) { > > + if (rte_power_freq_down) > > + rte_power_freq_down(lcore_id); > > + } > > else if ( (unsigned)(stats[lcore_id].nb_rx_processed / > > - stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) > > + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { > > /** > > * scale down a step if average packet per iteration less > > * than expectation. > > */ > > - rte_power_freq_down(lcore_id); > > + if (rte_power_freq_down) > > + rte_power_freq_down(lcore_id); > > + } > > > > /** > > * initialize another timer according to current frequency to ensure > > @@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t > > portid, > > > > } > > > > -#define SLEEP_GEAR1_THRESHOLD 100 > > -#define SLEEP_GEAR2_THRESHOLD 1000 > > +#define MINIMUM_SLEEP_TIME 1 > > +#define SUSPEND_THRESHOLD 300 > > > > static inline uint32_t > > power_idle_heuristic(uint32_t zero_rx_packet_count) > > { > > - /* If zero count is less than 100, use it as the sleep time in us */ > > - if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD) > > - return zero_rx_packet_count; > > - /* If zero count is less than 1000, sleep time should be 100 us */ > > - else if ((zero_rx_packet_count >=3D SLEEP_GEAR1_THRESHOLD) && > > - (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD)) > > - return SLEEP_GEAR1_THRESHOLD; > > - /* If zero count is greater than 1000, sleep time should be 1000 us *= / > > - else if (zero_rx_packet_count >=3D SLEEP_GEAR2_THRESHOLD) > > - return SLEEP_GEAR2_THRESHOLD; > > + /* If zero count is less than 100, sleep 1us */ > > + if (zero_rx_packet_count < SUSPEND_THRESHOLD) > > + return MINIMUM_SLEEP_TIME; > > + /* If zero count is less than 1000, sleep 100 us which is the minimum > > latency > > + switching from C3/C6 to C0 > > + */ > > + else > > + return SUSPEND_THRESHOLD; > > > > return 0; > > } > > @@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id, > > return FREQ_CURRENT; > > } > > > > +/** > > + * force polling thread sleep until one-shot rx interrupt triggers > > + * @param port_id > > + * Port id. > > + * @param queue_id > > + * Rx queue id. > > + * @return > > + * 0 on success > > + */ > > +static int > > +sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id) > > +{ > > + /* Enable one-shot rx interrupt */ > > + rte_eth_dev_rx_queue_intr_enable(port_id, queue_id); > > + > > + RTE_LOG(INFO, L3FWD_POWER, > > + "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n", > > + rte_lcore_id(), port_id, queue_id); > > + rte_eal_wait_rx_intr(port_id, queue_id); > > + RTE_LOG(INFO, L3FWD_POWER, > > + "lcore %u is waked up from rx interrupt on port%d,rxq%d\n", > > + rte_lcore_id(), port_id, queue_id); > > + > > + /* Disable one-shot rx interrupt */ > > + rte_eth_dev_rx_queue_intr_disable(port_id, queue_id); > > + > > + return 0; > > +} > > + > > /* main processing loop */ > > static int > > main_loop(__attribute__((unused)) void *dummy) > > @@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy) > > struct lcore_conf *qconf; > > struct lcore_rx_queue *rx_queue; > > enum freq_scale_hint_t lcore_scaleup_hint; > > - > > uint32_t lcore_rx_idle_count =3D 0; > > uint32_t lcore_idle_hint =3D 0; > > > > @@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy) > > prev_tsc_power =3D cur_tsc_power; > > } > > > > + > > +start_rx: > > /* > > * Read packet from RX queues > > */ > > @@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy) > > > > nb_rx =3D rte_eth_rx_burst(portid, queueid, pkts_burst, > > MAX_PKT_BURST); > > + > > stats[lcore_id].nb_rx_processed +=3D nb_rx; > > if (unlikely(nb_rx =3D=3D 0)) { > > /** > > @@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy) > > rx_queue->freq_up_hint; > > } > > > > - if (lcore_scaleup_hint =3D=3D FREQ_HIGHEST) > > - rte_power_freq_max(lcore_id); > > - else if (lcore_scaleup_hint =3D=3D FREQ_HIGHER) > > - rte_power_freq_up(lcore_id); > > + if (lcore_scaleup_hint =3D=3D FREQ_HIGHEST) { > > + if (rte_power_freq_max) > > + rte_power_freq_max(lcore_id); > > + } else if (lcore_scaleup_hint =3D=3D FREQ_HIGHER) { > > + if (rte_power_freq_up) > > + rte_power_freq_up(lcore_id); > > + } > > } else { > > /** > > * All Rx queues empty in recent consecutive polls, > > @@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy) > > lcore_idle_hint =3D rx_queue->idle_hint; > > } > > > > - if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD) > > + if (lcore_idle_hint < SUSPEND_THRESHOLD) > > /** > > - * execute "pause" instruction to avoid context > > - * switch for short sleep. > > - */ > > + * execute "pause" instruction to avoid context > > + * switch which generally take hundres of microsecond > > + * for short sleep. > > + */ > > rte_delay_us(lcore_idle_hint); > > - else > > - /* long sleep force runing thread to suspend */ > > - usleep(lcore_idle_hint); > > - > > + else { > > + /* suspend untill rx interrupt trigges */ > > + sleep_until_rx_interrupt( > > + qconf->rx_queue_list[0].port_id, > > + qconf->rx_queue_list[0].queue_id); > > + /* start receiving packets immediately */ > > + goto start_rx; > > + } > > stats[lcore_id].sleep_time +=3D lcore_idle_hint; > > } > > } > > } > > > > +/** > > + * It will be called as the callback for specified port after a LSI in= terrupt > > + * has been fully handled. This callback needs to be implemented caref= ully as > > + * it will be called in the interrupt host thread which is different f= rom the > > + * application main thread. > > + * > > + * @param port_id > > + * Port id. > > + * @param type > > + * event type. > > + * @param param > > + * Pointer to(address of) the parameters. > > + * > > + * @return > > + * void. > > + */ > > + > > +/* > > +static void > > +rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type t= ype, > > void *param) > > +{ > > + uint64_t rx_queues =3D *((uint64_t *)param); > > + > > + port_id =3D port_id + 1; > > + if(type =3D=3D RTE_ETH_EVENT_INTR_RX) > > + port_id =3D rx_queues; > [LCM] What's bunch of things for ? Debug related code which will be removed in V2 patch. >=20 > > +} > > +*/ > > + > > static int > > check_lcore_params(void) > > { > > @@ -1270,7 +1346,7 @@ setup_hash(int socketid) > > char s[64]; > > > > /* create ipv4 hash */ > > - snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > > + rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > > ipv4_l3fwd_hash_params.name =3D s; > > ipv4_l3fwd_hash_params.socket_id =3D socketid; > > ipv4_l3fwd_lookup_struct[socketid] =3D > > @@ -1280,7 +1356,7 @@ setup_hash(int socketid) > > "socket %d\n", socketid); > > > > /* create ipv6 hash */ > > - snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > > + rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > > ipv6_l3fwd_hash_params.name =3D s; > > ipv6_l3fwd_hash_params.socket_id =3D socketid; > > ipv6_l3fwd_lookup_struct[socketid] =3D > > @@ -1476,6 +1552,7 @@ main(int argc, char **argv) > > unsigned lcore_id; > > uint64_t hz; > > uint32_t n_tx_queue, nb_lcores; > > + uint32_t dev_rxq_num, dev_txq_num; > > uint8_t portid, nb_rx_queue, queue, socketid; > > > > /* catch SIGINT and restore cpufreq governor to ondemand */ > > @@ -1525,10 +1602,18 @@ main(int argc, char **argv) > > printf("Initializing port %d ... ", portid ); > > fflush(stdout); > > > > + rte_eth_dev_info_get(portid, &dev_info); > > + dev_rxq_num =3D dev_info.max_rx_queues; > > + dev_txq_num =3D dev_info.max_tx_queues; > > + > > nb_rx_queue =3D get_port_n_rx_queues(portid); > > + if (nb_rx_queue > dev_rxq_num) > > + rte_exit(EXIT_FAILURE, "Cannot configure not existed rxq: " > > + "port=3D%d\n", portid); > > + > > n_tx_queue =3D nb_lcores; > > - if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) > > - n_tx_queue =3D MAX_TX_QUEUE_PER_PORT; > > + if (n_tx_queue > dev_txq_num) > > + n_tx_queue =3D dev_txq_num; > > printf("Creating queues: nb_rxq=3D%d nb_txq=3D%u... ", > > nb_rx_queue, (unsigned)n_tx_queue ); > > ret =3D rte_eth_dev_configure(portid, nb_rx_queue, > > @@ -1552,6 +1637,9 @@ main(int argc, char **argv) > > if (rte_lcore_is_enabled(lcore_id) =3D=3D 0) > > continue; > > > > + if (queueid >=3D dev_txq_num) > > + continue; > > + > > if (numa_on) > > socketid =3D \ > > (uint8_t)rte_lcore_to_socket_id(lcore_id); > > @@ -1586,8 +1674,9 @@ main(int argc, char **argv) > > /* init power management library */ > > ret =3D rte_power_init(lcore_id); > > if (ret) > > - rte_exit(EXIT_FAILURE, "Power management library " > > - "initialization failed on core%u\n", lcore_id); > > + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER, > > + "Power management library initialization " > > + "failed on core%u", lcore_id); > > > > /* init timer structures for each enabled lcore */ > > rte_timer_init(&power_timers[lcore_id]); > > @@ -1635,7 +1724,6 @@ main(int argc, char **argv) > > if (ret < 0) > > rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=3D%d, " > > "port=3D%d\n", ret, portid); > > - > > /* > > * If enabled, put device in promiscuous mode. > > * This allows IO forwarding mode to forward packets > > -- > > 1.8.1.4