From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id 7606EA05D3 for ; Wed, 27 Mar 2019 14:58:38 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 3568B5920; Wed, 27 Mar 2019 14:58:38 +0100 (CET) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id A86A458C6 for ; Wed, 27 Mar 2019 14:58:35 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 27 Mar 2019 06:58:34 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,277,1549958400"; d="scan'208";a="286329157" Received: from aburakov-mobl1.ger.corp.intel.com (HELO [10.237.220.103]) ([10.237.220.103]) by orsmga004.jf.intel.com with ESMTP; 27 Mar 2019 06:58:33 -0700 To: David Hunt , dev@dpdk.org Cc: liang.j.ma@intel.com References: <20190222114551.30692-1-david.hunt@intel.com> From: "Burakov, Anatoly" Message-ID: <19701bdf-65cb-9b25-d3ad-6c3171c70105@intel.com> Date: Wed, 27 Mar 2019 13:58:32 +0000 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Thunderbird/60.6.0 MIME-Version: 1.0 In-Reply-To: <20190222114551.30692-1-david.hunt@intel.com> Content-Type: text/plain; charset="UTF-8"; format="flowed" Content-Language: en-US Content-Transfer-Encoding: 7bit Subject: Re: [dpdk-dev] [PATCH v1] examples/distributor: detect high frequency cores X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Message-ID: <20190327135832.9z4hpfEKCHKM00pq8l1QvN_0ifsUeMUXXdYskn13EyQ@z> On 22-Feb-19 11:45 AM, David Hunt wrote: > The distributor application is bottlenecked by the distributor core, > so if we can give more frequency to this core, then the overall > performance of the application may increase. > > This patch uses the rte_power_get_capabilities() API to query the cores > provided in the core mask, and if any high frequency cores are found > (e.g. Turbo Boost is enabled), we will pin the distributor workload to > that core. > > Signed-off-by: Liang Ma > Signed-off-by: David Hunt > --- > examples/distributor/main.c | 185 ++++++++++++++++++++++++------- > examples/distributor/meson.build | 2 +- > 2 files changed, 149 insertions(+), 38 deletions(-) > > diff --git a/examples/distributor/main.c b/examples/distributor/main.c > index 03a05e3d9..0541c50b0 100644 > --- a/examples/distributor/main.c > +++ b/examples/distributor/main.c > @@ -16,6 +16,7 @@ > #include > #include > #include > +#include > > #define RX_RING_SIZE 1024 > #define TX_RING_SIZE 1024 > @@ -281,6 +282,7 @@ lcore_rx(struct lcore_params *p) > if (++port == nb_ports) > port = 0; > } > + rte_power_exit(rte_lcore_id()); why is this being added? it doesn't seem relevant to neither the commit message nor the feature. if this was missing before, please add it in a separate patch. same applies to all other instances where rte_power_exit() is added. also, your app seems to support power and non-power operation. what happens when rte_power_exit is called on an lcore that's not been initialized (i.e. the fallback to non-power mode)? does this (and other rte_power_exit() instances) code only get called when in power mode? > /* set worker & tx threads quit flag */ > printf("\nCore %u exiting rx task.\n", rte_lcore_id()); > quit_signal = 1; > @@ -364,6 +366,8 @@ lcore_distributor(struct lcore_params *p) > printf("\nCore %u exiting distributor task.\n", rte_lcore_id()); > quit_signal_work = 1; > > + rte_power_exit(rte_lcore_id()); > + > rte_distributor_flush(d); > /* Unblock any returns so workers can exit */ > rte_distributor_clear_returns(d); > @@ -435,6 +439,7 @@ lcore_tx(struct rte_ring *in_r) > } > } > } > + rte_power_exit(rte_lcore_id()); > printf("\nCore %u exiting tx task.\n", rte_lcore_id()); > return 0; > } > @@ -575,9 +580,32 @@ lcore_worker(struct lcore_params *p) > if (num > 0) > app_stats.worker_bursts[p->worker_id][num-1]++; > } > + rte_power_exit(rte_lcore_id()); > return 0; > } > > +static int > +init_power_library(void) > +{ > + int ret = 0, lcore_id; > + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { RTE_LCORE_FOREACH? > + if (rte_lcore_is_enabled(lcore_id)) { > + /* init power management library */ > + ret = rte_power_init(lcore_id); > + if (ret) > + RTE_LOG(ERR, POWER, > + "Library initialization failed on core %u\n", > + lcore_id); > + /* > + * Return on first failure, we'll fall back > + * to non-power operation > + */ > + return ret; You'll probably want to fix indentation here, it's misleading. > + } > + } > + return ret; > +} > + > /* display usage */ > static void > print_usage(const char *prgname) <...> > + * Here we'll pre-assign lcore ids to the rx, tx and > + * distributor workloads if there's higher frequency > + * on those cores e.g. if Turbo Boost is enabled. > + * It's also worth mentioning that it will assign cores in a > + * specific order, so that if there's less than three > + * available, the higher frequency cores will go to the > + * distributor first, then rx, then tx. > + */ > + RTE_LCORE_FOREACH_SLAVE(lcore_id) { > + > + rte_power_get_capabilities(lcore_id, &lcore_cap); > + > + if (lcore_cap.turbo == 1) { > + priority_num++; > + switch (priority_num) { > + case 1: > + distr_core_id = lcore_id; > + printf("Distributor on priority core %d\n", This says "priority", other instances say "preferred". Which is it? :) > + lcore_id); > + break; > + case 2: > + rx_core_id = lcore_id; > + printf("Rx on preferred core %d\n", > + lcore_id); > + break; > + case 3: > + tx_core_id = lcore_id; > + printf("Tx on preferred core %d\n", > + lcore_id); > + break; > + default: > + break; > + } > + } > + } > + } > + > + /* > + * If there's any of the key workloads left without an lcore_id Double space after "there's". > + * after the higer frequency core assignment above, pre-assign > + * them here. > + */ > RTE_LCORE_FOREACH_SLAVE(lcore_id) { > - if (worker_id == rte_lcore_count() - 3) { > - printf("Starting distributor on lcore_id %d\n", > - lcore_id); > - /* distributor core */ > - struct lcore_params *p = > - rte_malloc(NULL, sizeof(*p), 0); > - if (!p) > - rte_panic("malloc failure\n"); > - *p = (struct lcore_params){worker_id, d, > - rx_dist_ring, dist_tx_ring, mbuf_pool}; > - rte_eal_remote_launch( > - (lcore_function_t *)lcore_distributor, > - p, lcore_id); > - } else if (worker_id == rte_lcore_count() - 4) { > - printf("Starting tx on worker_id %d, lcore_id %d\n", > - worker_id, lcore_id); > - /* tx core */ > - rte_eal_remote_launch((lcore_function_t *)lcore_tx, > - dist_tx_ring, lcore_id); > - } else if (worker_id == rte_lcore_count() - 2) { > - printf("Starting rx on worker_id %d, lcore_id %d\n", > - worker_id, lcore_id); > - /* rx core */ > - struct lcore_params *p = > - rte_malloc(NULL, sizeof(*p), 0); > - if (!p) > - rte_panic("malloc failure\n"); > - *p = (struct lcore_params){worker_id, d, rx_dist_ring, > - dist_tx_ring, mbuf_pool}; > - rte_eal_remote_launch((lcore_function_t *)lcore_rx, > - p, lcore_id); > + > + if (distr_core_id == 0) { 0 is a valid core id. You would probably want to use -1 here. > + distr_core_id = lcore_id; > + printf("Distributor on core %d\n", lcore_id); > + } > + if ((rx_core_id == 0) && > + (lcore_id != distr_core_id)) { You could just check if (lcore_id == distr_core_id || lcore_id == rx_core_id || lcore_id == tx_core_id) and skip the iteration entirely, rather than checking at every step. > + rx_core_id = lcore_id; > + printf("Rx on core %d\n", lcore_id); > + } > + if ((tx_core_id == 0) && > + (lcore_id != distr_core_id) && > + (lcore_id != rx_core_id)) { > + tx_core_id = lcore_id; > + printf("Tx on core %d\n", lcore_id); > + } > + counter++; > + } > + > + printf(" tx id %d, dist id %d, rx id %d\n", > + tx_core_id, > + distr_core_id, > + rx_core_id); > + > + /* > + * Kick off all the worker threads first, avoiding the pre-assigned > + * lcore_ids for tx, rx and distributor workloads. > + */ > + RTE_LCORE_FOREACH_SLAVE(lcore_id) { > + > + if ((lcore_id == distr_core_id) || > + (lcore_id == rx_core_id) || > + (lcore_id == tx_core_id)) { > + > } else { This is a very unorthodox way of skipping an iteration :) -- Thanks, Anatoly