From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 0A7005A99 for ; Wed, 28 Jan 2015 10:51:18 +0100 (CET) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by fmsmga101.fm.intel.com with ESMTP; 28 Jan 2015 01:50:53 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.09,480,1418112000"; d="scan'208";a="677245391" Received: from shvmail01.sh.intel.com ([10.239.29.42]) by orsmga002.jf.intel.com with ESMTP; 28 Jan 2015 01:50:53 -0800 Received: from shecgisg004.sh.intel.com (shecgisg004.sh.intel.com [10.239.29.89]) by shvmail01.sh.intel.com with ESMTP id t0S9ooIQ011817; Wed, 28 Jan 2015 17:50:50 +0800 Received: from shecgisg004.sh.intel.com (localhost [127.0.0.1]) by shecgisg004.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id t0S9okrQ007923; Wed, 28 Jan 2015 17:50:48 +0800 Received: (from dyzhou@localhost) by shecgisg004.sh.intel.com (8.13.6/8.13.6/Submit) id t0S9okwr007919; Wed, 28 Jan 2015 17:50:46 +0800 From: Danny Zhou To: dev@dpdk.org Date: Wed, 28 Jan 2015 17:50:31 +0800 Message-Id: <1422438631-7853-6-git-send-email-danny.zhou@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1422438631-7853-1-git-send-email-danny.zhou@intel.com> References: <1422438631-7853-1-git-send-email-danny.zhou@intel.com> Subject: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 Jan 2015 09:51:19 -0000 Signed-off-by: Danny Zhou --- examples/l3fwd-power/main.c | 170 +++++++++++++++++++++++++++++++++----------- 1 file changed, 129 insertions(+), 41 deletions(-) diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c index f6b55b9..e6e4f55 100644 --- a/examples/l3fwd-power/main.c +++ b/examples/l3fwd-power/main.c @@ -75,12 +75,13 @@ #include #include #include +#include #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 #define MAX_PKT_BURST 32 -#define MIN_ZERO_POLL_COUNT 5 +#define MIN_ZERO_POLL_COUNT 10 /* around 100ms at 2 Ghz */ #define TIMER_RESOLUTION_CYCLES 200000000ULL @@ -188,6 +189,9 @@ struct lcore_rx_queue { #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS #define MAX_RX_QUEUE_PER_PORT 128 +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 + + #define MAX_LCORE_PARAMS 1024 struct lcore_params { uint8_t port_id; @@ -214,7 +218,7 @@ static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / static struct rte_eth_conf port_conf = { .rxmode = { - .mq_mode = ETH_MQ_RX_RSS, + .mq_mode = ETH_MQ_RX_RSS, .max_rx_pkt_len = ETHER_MAX_LEN, .split_hdr_size = 0, .header_split = 0, /**< Header Split disabled */ @@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf = { .rx_adv_conf = { .rss_conf = { .rss_key = NULL, - .rss_hf = ETH_RSS_IP, + .rss_hf = ETH_RSS_UDP, }, }, .txmode = { - .mq_mode = ETH_DCB_NONE, + .mq_mode = ETH_MQ_TX_NONE, + }, + .intr_conf = { + .rxq = 1, /**< rxq interrupt feature enabled */ }, }; @@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct rte_timer *tim, /* accumulate total execution time in us when callback is invoked */ sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / (float)SCALING_PERIOD; - /** * check whether need to scale down frequency a step if it sleep a lot. */ - if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) - rte_power_freq_down(lcore_id); + if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { + if (rte_power_freq_down) + rte_power_freq_down(lcore_id); + } else if ( (unsigned)(stats[lcore_id].nb_rx_processed / - stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { /** * scale down a step if average packet per iteration less * than expectation. */ - rte_power_freq_down(lcore_id); + if (rte_power_freq_down) + rte_power_freq_down(lcore_id); + } /** * initialize another timer according to current frequency to ensure @@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, } -#define SLEEP_GEAR1_THRESHOLD 100 -#define SLEEP_GEAR2_THRESHOLD 1000 +#define MINIMUM_SLEEP_TIME 1 +#define SUSPEND_THRESHOLD 300 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count) { - /* If zero count is less than 100, use it as the sleep time in us */ - if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD) - return zero_rx_packet_count; - /* If zero count is less than 1000, sleep time should be 100 us */ - else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) && - (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD)) - return SLEEP_GEAR1_THRESHOLD; - /* If zero count is greater than 1000, sleep time should be 1000 us */ - else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD) - return SLEEP_GEAR2_THRESHOLD; + /* If zero count is less than 100, sleep 1us */ + if (zero_rx_packet_count < SUSPEND_THRESHOLD) + return MINIMUM_SLEEP_TIME; + /* If zero count is less than 1000, sleep 100 us which is the minimum latency + switching from C3/C6 to C0 + */ + else + return SUSPEND_THRESHOLD; return 0; } @@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id, return FREQ_CURRENT; } +/** + * force polling thread sleep until one-shot rx interrupt triggers + * @param port_id + * Port id. + * @param queue_id + * Rx queue id. + * @return + * 0 on success + */ +static int +sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id) +{ + /* Enable one-shot rx interrupt */ + rte_eth_dev_rx_queue_intr_enable(port_id, queue_id); + + RTE_LOG(INFO, L3FWD_POWER, + "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n", + rte_lcore_id(), port_id, queue_id); + rte_eal_wait_rx_intr(port_id, queue_id); + RTE_LOG(INFO, L3FWD_POWER, + "lcore %u is waked up from rx interrupt on port%d,rxq%d\n", + rte_lcore_id(), port_id, queue_id); + + /* Disable one-shot rx interrupt */ + rte_eth_dev_rx_queue_intr_disable(port_id, queue_id); + + return 0; +} + /* main processing loop */ static int main_loop(__attribute__((unused)) void *dummy) @@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy) struct lcore_conf *qconf; struct lcore_rx_queue *rx_queue; enum freq_scale_hint_t lcore_scaleup_hint; - uint32_t lcore_rx_idle_count = 0; uint32_t lcore_idle_hint = 0; @@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy) prev_tsc_power = cur_tsc_power; } + +start_rx: /* * Read packet from RX queues */ @@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy) nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, MAX_PKT_BURST); + stats[lcore_id].nb_rx_processed += nb_rx; if (unlikely(nb_rx == 0)) { /** @@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy) rx_queue->freq_up_hint; } - if (lcore_scaleup_hint == FREQ_HIGHEST) - rte_power_freq_max(lcore_id); - else if (lcore_scaleup_hint == FREQ_HIGHER) - rte_power_freq_up(lcore_id); + if (lcore_scaleup_hint == FREQ_HIGHEST) { + if (rte_power_freq_max) + rte_power_freq_max(lcore_id); + } else if (lcore_scaleup_hint == FREQ_HIGHER) { + if (rte_power_freq_up) + rte_power_freq_up(lcore_id); + } } else { /** * All Rx queues empty in recent consecutive polls, @@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy) lcore_idle_hint = rx_queue->idle_hint; } - if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD) + if (lcore_idle_hint < SUSPEND_THRESHOLD) /** - * execute "pause" instruction to avoid context - * switch for short sleep. - */ + * execute "pause" instruction to avoid context + * switch which generally take hundres of microsecond + * for short sleep. + */ rte_delay_us(lcore_idle_hint); - else - /* long sleep force runing thread to suspend */ - usleep(lcore_idle_hint); - + else { + /* suspend untill rx interrupt trigges */ + sleep_until_rx_interrupt( + qconf->rx_queue_list[0].port_id, + qconf->rx_queue_list[0].queue_id); + /* start receiving packets immediately */ + goto start_rx; + } stats[lcore_id].sleep_time += lcore_idle_hint; } } } +/** + * It will be called as the callback for specified port after a LSI interrupt + * has been fully handled. This callback needs to be implemented carefully as + * it will be called in the interrupt host thread which is different from the + * application main thread. + * + * @param port_id + * Port id. + * @param type + * event type. + * @param param + * Pointer to(address of) the parameters. + * + * @return + * void. + */ + +/* +static void +rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type type, void *param) +{ + uint64_t rx_queues = *((uint64_t *)param); + + port_id = port_id + 1; + if(type == RTE_ETH_EVENT_INTR_RX) + port_id = rx_queues; +} +*/ + static int check_lcore_params(void) { @@ -1270,7 +1346,7 @@ setup_hash(int socketid) char s[64]; /* create ipv4 hash */ - snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); + rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); ipv4_l3fwd_hash_params.name = s; ipv4_l3fwd_hash_params.socket_id = socketid; ipv4_l3fwd_lookup_struct[socketid] = @@ -1280,7 +1356,7 @@ setup_hash(int socketid) "socket %d\n", socketid); /* create ipv6 hash */ - snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); + rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); ipv6_l3fwd_hash_params.name = s; ipv6_l3fwd_hash_params.socket_id = socketid; ipv6_l3fwd_lookup_struct[socketid] = @@ -1476,6 +1552,7 @@ main(int argc, char **argv) unsigned lcore_id; uint64_t hz; uint32_t n_tx_queue, nb_lcores; + uint32_t dev_rxq_num, dev_txq_num; uint8_t portid, nb_rx_queue, queue, socketid; /* catch SIGINT and restore cpufreq governor to ondemand */ @@ -1525,10 +1602,18 @@ main(int argc, char **argv) printf("Initializing port %d ... ", portid ); fflush(stdout); + rte_eth_dev_info_get(portid, &dev_info); + dev_rxq_num = dev_info.max_rx_queues; + dev_txq_num = dev_info.max_tx_queues; + nb_rx_queue = get_port_n_rx_queues(portid); + if (nb_rx_queue > dev_rxq_num) + rte_exit(EXIT_FAILURE, "Cannot configure not existed rxq: " + "port=%d\n", portid); + n_tx_queue = nb_lcores; - if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) - n_tx_queue = MAX_TX_QUEUE_PER_PORT; + if (n_tx_queue > dev_txq_num) + n_tx_queue = dev_txq_num; printf("Creating queues: nb_rxq=%d nb_txq=%u... ", nb_rx_queue, (unsigned)n_tx_queue ); ret = rte_eth_dev_configure(portid, nb_rx_queue, @@ -1552,6 +1637,9 @@ main(int argc, char **argv) if (rte_lcore_is_enabled(lcore_id) == 0) continue; + if (queueid >= dev_txq_num) + continue; + if (numa_on) socketid = \ (uint8_t)rte_lcore_to_socket_id(lcore_id); @@ -1586,8 +1674,9 @@ main(int argc, char **argv) /* init power management library */ ret = rte_power_init(lcore_id); if (ret) - rte_exit(EXIT_FAILURE, "Power management library " - "initialization failed on core%u\n", lcore_id); + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER, + "Power management library initialization " + "failed on core%u", lcore_id); /* init timer structures for each enabled lcore */ rte_timer_init(&power_timers[lcore_id]); @@ -1635,7 +1724,6 @@ main(int argc, char **argv) if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " "port=%d\n", ret, portid); - /* * If enabled, put device in promiscuous mode. * This allows IO forwarding mode to forward packets -- 1.8.1.4