From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 89E501F5 for ; Wed, 28 Jan 2015 19:34:29 +0100 (CET) Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga103.jf.intel.com with ESMTP; 28 Jan 2015 10:30:07 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.09,482,1418112000"; d="scan'208";a="658184481" Received: from kmsmsx153.gar.corp.intel.com ([172.21.73.88]) by fmsmga001.fm.intel.com with ESMTP; 28 Jan 2015 10:34:26 -0800 Received: from kmsmsx154.gar.corp.intel.com (172.21.73.14) by KMSMSX153.gar.corp.intel.com (172.21.73.88) with Microsoft SMTP Server (TLS) id 14.3.195.1; Thu, 29 Jan 2015 02:34:25 +0800 Received: from shsmsx152.ccr.corp.intel.com (10.239.6.52) by KMSMSX154.gar.corp.intel.com (172.21.73.14) with Microsoft SMTP Server (TLS) id 14.3.195.1; Thu, 29 Jan 2015 02:34:24 +0800 Received: from shsmsx102.ccr.corp.intel.com ([169.254.2.238]) by SHSMSX152.ccr.corp.intel.com ([169.254.6.129]) with mapi id 14.03.0195.001; Thu, 29 Jan 2015 02:34:23 +0800 From: "Liang, Cunming" To: "Zhou, Danny" , "dev@dpdk.org" Thread-Topic: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch Thread-Index: AQHQOuApfWQre+OafE+V0G/1ZGMJ05zV2QqA Date: Wed, 28 Jan 2015 18:34:23 +0000 Message-ID: References: <1422438631-7853-1-git-send-email-danny.zhou@intel.com> <1422438631-7853-6-git-send-email-danny.zhou@intel.com> In-Reply-To: <1422438631-7853-6-git-send-email-danny.zhou@intel.com> Accept-Language: zh-CN, en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.239.127.40] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 Jan 2015 18:34:30 -0000 > -----Original Message----- > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Danny Zhou > Sent: Wednesday, January 28, 2015 2:51 AM > To: dev@dpdk.org > Subject: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interr= upt > and polling/interrupt mode switch >=20 > Signed-off-by: Danny Zhou > --- > examples/l3fwd-power/main.c | 170 > +++++++++++++++++++++++++++++++++----------- > 1 file changed, 129 insertions(+), 41 deletions(-) >=20 > diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c > index f6b55b9..e6e4f55 100644 > --- a/examples/l3fwd-power/main.c > +++ b/examples/l3fwd-power/main.c > @@ -75,12 +75,13 @@ > #include > #include > #include > +#include >=20 > #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 >=20 > #define MAX_PKT_BURST 32 >=20 > -#define MIN_ZERO_POLL_COUNT 5 > +#define MIN_ZERO_POLL_COUNT 10 >=20 > /* around 100ms at 2 Ghz */ > #define TIMER_RESOLUTION_CYCLES 200000000ULL > @@ -188,6 +189,9 @@ struct lcore_rx_queue { > #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS > #define MAX_RX_QUEUE_PER_PORT 128 >=20 > +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 > + > + > #define MAX_LCORE_PARAMS 1024 > struct lcore_params { > uint8_t port_id; > @@ -214,7 +218,7 @@ static uint16_t nb_lcore_params =3D > sizeof(lcore_params_array_default) / >=20 > static struct rte_eth_conf port_conf =3D { > .rxmode =3D { > - .mq_mode =3D ETH_MQ_RX_RSS, > + .mq_mode =3D ETH_MQ_RX_RSS, > .max_rx_pkt_len =3D ETHER_MAX_LEN, > .split_hdr_size =3D 0, > .header_split =3D 0, /**< Header Split disabled */ > @@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf =3D { > .rx_adv_conf =3D { > .rss_conf =3D { > .rss_key =3D NULL, > - .rss_hf =3D ETH_RSS_IP, > + .rss_hf =3D ETH_RSS_UDP, > }, > }, > .txmode =3D { > - .mq_mode =3D ETH_DCB_NONE, > + .mq_mode =3D ETH_MQ_TX_NONE, > + }, > + .intr_conf =3D { > + .rxq =3D 1, /**< rxq interrupt feature enabled */ > }, > }; >=20 > @@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct > rte_timer *tim, > /* accumulate total execution time in us when callback is invoked */ > sleep_time_ratio =3D (float)(stats[lcore_id].sleep_time) / > (float)SCALING_PERIOD; > - > /** > * check whether need to scale down frequency a step if it sleep a lot. > */ > - if (sleep_time_ratio >=3D SCALING_DOWN_TIME_RATIO_THRESHOLD) > - rte_power_freq_down(lcore_id); > + if (sleep_time_ratio >=3D SCALING_DOWN_TIME_RATIO_THRESHOLD) { > + if (rte_power_freq_down) > + rte_power_freq_down(lcore_id); > + } > else if ( (unsigned)(stats[lcore_id].nb_rx_processed / > - stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) > + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { > /** > * scale down a step if average packet per iteration less > * than expectation. > */ > - rte_power_freq_down(lcore_id); > + if (rte_power_freq_down) > + rte_power_freq_down(lcore_id); > + } >=20 > /** > * initialize another timer according to current frequency to ensure > @@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t > portid, >=20 > } >=20 > -#define SLEEP_GEAR1_THRESHOLD 100 > -#define SLEEP_GEAR2_THRESHOLD 1000 > +#define MINIMUM_SLEEP_TIME 1 > +#define SUSPEND_THRESHOLD 300 >=20 > static inline uint32_t > power_idle_heuristic(uint32_t zero_rx_packet_count) > { > - /* If zero count is less than 100, use it as the sleep time in us */ > - if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD) > - return zero_rx_packet_count; > - /* If zero count is less than 1000, sleep time should be 100 us */ > - else if ((zero_rx_packet_count >=3D SLEEP_GEAR1_THRESHOLD) && > - (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD)) > - return SLEEP_GEAR1_THRESHOLD; > - /* If zero count is greater than 1000, sleep time should be 1000 us */ > - else if (zero_rx_packet_count >=3D SLEEP_GEAR2_THRESHOLD) > - return SLEEP_GEAR2_THRESHOLD; > + /* If zero count is less than 100, sleep 1us */ > + if (zero_rx_packet_count < SUSPEND_THRESHOLD) > + return MINIMUM_SLEEP_TIME; > + /* If zero count is less than 1000, sleep 100 us which is the minimum > latency > + switching from C3/C6 to C0 > + */ > + else > + return SUSPEND_THRESHOLD; >=20 > return 0; > } > @@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id, > return FREQ_CURRENT; > } >=20 > +/** > + * force polling thread sleep until one-shot rx interrupt triggers > + * @param port_id > + * Port id. > + * @param queue_id > + * Rx queue id. > + * @return > + * 0 on success > + */ > +static int > +sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id) > +{ > + /* Enable one-shot rx interrupt */ > + rte_eth_dev_rx_queue_intr_enable(port_id, queue_id); > + > + RTE_LOG(INFO, L3FWD_POWER, > + "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n", > + rte_lcore_id(), port_id, queue_id); > + rte_eal_wait_rx_intr(port_id, queue_id); > + RTE_LOG(INFO, L3FWD_POWER, > + "lcore %u is waked up from rx interrupt on port%d,rxq%d\n", > + rte_lcore_id(), port_id, queue_id); > + > + /* Disable one-shot rx interrupt */ > + rte_eth_dev_rx_queue_intr_disable(port_id, queue_id); > + > + return 0; > +} > + > /* main processing loop */ > static int > main_loop(__attribute__((unused)) void *dummy) > @@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy) > struct lcore_conf *qconf; > struct lcore_rx_queue *rx_queue; > enum freq_scale_hint_t lcore_scaleup_hint; > - > uint32_t lcore_rx_idle_count =3D 0; > uint32_t lcore_idle_hint =3D 0; >=20 > @@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy) > prev_tsc_power =3D cur_tsc_power; > } >=20 > + > +start_rx: > /* > * Read packet from RX queues > */ > @@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy) >=20 > nb_rx =3D rte_eth_rx_burst(portid, queueid, pkts_burst, > MAX_PKT_BURST); > + > stats[lcore_id].nb_rx_processed +=3D nb_rx; > if (unlikely(nb_rx =3D=3D 0)) { > /** > @@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy) > rx_queue->freq_up_hint; > } >=20 > - if (lcore_scaleup_hint =3D=3D FREQ_HIGHEST) > - rte_power_freq_max(lcore_id); > - else if (lcore_scaleup_hint =3D=3D FREQ_HIGHER) > - rte_power_freq_up(lcore_id); > + if (lcore_scaleup_hint =3D=3D FREQ_HIGHEST) { > + if (rte_power_freq_max) > + rte_power_freq_max(lcore_id); > + } else if (lcore_scaleup_hint =3D=3D FREQ_HIGHER) { > + if (rte_power_freq_up) > + rte_power_freq_up(lcore_id); > + } > } else { > /** > * All Rx queues empty in recent consecutive polls, > @@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy) > lcore_idle_hint =3D rx_queue->idle_hint; > } >=20 > - if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD) > + if (lcore_idle_hint < SUSPEND_THRESHOLD) > /** > - * execute "pause" instruction to avoid context > - * switch for short sleep. > - */ > + * execute "pause" instruction to avoid context > + * switch which generally take hundres of microsecond > + * for short sleep. > + */ > rte_delay_us(lcore_idle_hint); > - else > - /* long sleep force runing thread to suspend */ > - usleep(lcore_idle_hint); > - > + else { > + /* suspend untill rx interrupt trigges */ > + sleep_until_rx_interrupt( > + qconf->rx_queue_list[0].port_id, > + qconf->rx_queue_list[0].queue_id); > + /* start receiving packets immediately */ > + goto start_rx; > + } > stats[lcore_id].sleep_time +=3D lcore_idle_hint; > } > } > } >=20 > +/** > + * It will be called as the callback for specified port after a LSI inte= rrupt > + * has been fully handled. This callback needs to be implemented careful= ly as > + * it will be called in the interrupt host thread which is different fro= m the > + * application main thread. > + * > + * @param port_id > + * Port id. > + * @param type > + * event type. > + * @param param > + * Pointer to(address of) the parameters. > + * > + * @return > + * void. > + */ > + > +/* > +static void > +rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type typ= e, > void *param) > +{ > + uint64_t rx_queues =3D *((uint64_t *)param); > + > + port_id =3D port_id + 1; > + if(type =3D=3D RTE_ETH_EVENT_INTR_RX) > + port_id =3D rx_queues; [LCM] What's bunch of things for ? > +} > +*/ > + > static int > check_lcore_params(void) > { > @@ -1270,7 +1346,7 @@ setup_hash(int socketid) > char s[64]; >=20 > /* create ipv4 hash */ > - snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > + rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > ipv4_l3fwd_hash_params.name =3D s; > ipv4_l3fwd_hash_params.socket_id =3D socketid; > ipv4_l3fwd_lookup_struct[socketid] =3D > @@ -1280,7 +1356,7 @@ setup_hash(int socketid) > "socket %d\n", socketid); >=20 > /* create ipv6 hash */ > - snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > + rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > ipv6_l3fwd_hash_params.name =3D s; > ipv6_l3fwd_hash_params.socket_id =3D socketid; > ipv6_l3fwd_lookup_struct[socketid] =3D > @@ -1476,6 +1552,7 @@ main(int argc, char **argv) > unsigned lcore_id; > uint64_t hz; > uint32_t n_tx_queue, nb_lcores; > + uint32_t dev_rxq_num, dev_txq_num; > uint8_t portid, nb_rx_queue, queue, socketid; >=20 > /* catch SIGINT and restore cpufreq governor to ondemand */ > @@ -1525,10 +1602,18 @@ main(int argc, char **argv) > printf("Initializing port %d ... ", portid ); > fflush(stdout); >=20 > + rte_eth_dev_info_get(portid, &dev_info); > + dev_rxq_num =3D dev_info.max_rx_queues; > + dev_txq_num =3D dev_info.max_tx_queues; > + > nb_rx_queue =3D get_port_n_rx_queues(portid); > + if (nb_rx_queue > dev_rxq_num) > + rte_exit(EXIT_FAILURE, "Cannot configure not existed rxq: " > + "port=3D%d\n", portid); > + > n_tx_queue =3D nb_lcores; > - if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) > - n_tx_queue =3D MAX_TX_QUEUE_PER_PORT; > + if (n_tx_queue > dev_txq_num) > + n_tx_queue =3D dev_txq_num; > printf("Creating queues: nb_rxq=3D%d nb_txq=3D%u... ", > nb_rx_queue, (unsigned)n_tx_queue ); > ret =3D rte_eth_dev_configure(portid, nb_rx_queue, > @@ -1552,6 +1637,9 @@ main(int argc, char **argv) > if (rte_lcore_is_enabled(lcore_id) =3D=3D 0) > continue; >=20 > + if (queueid >=3D dev_txq_num) > + continue; > + > if (numa_on) > socketid =3D \ > (uint8_t)rte_lcore_to_socket_id(lcore_id); > @@ -1586,8 +1674,9 @@ main(int argc, char **argv) > /* init power management library */ > ret =3D rte_power_init(lcore_id); > if (ret) > - rte_exit(EXIT_FAILURE, "Power management library " > - "initialization failed on core%u\n", lcore_id); > + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER, > + "Power management library initialization " > + "failed on core%u", lcore_id); >=20 > /* init timer structures for each enabled lcore */ > rte_timer_init(&power_timers[lcore_id]); > @@ -1635,7 +1724,6 @@ main(int argc, char **argv) > if (ret < 0) > rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=3D%d, " > "port=3D%d\n", ret, portid); > - > /* > * If enabled, put device in promiscuous mode. > * This allows IO forwarding mode to forward packets > -- > 1.8.1.4