From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id CE333A0A03; Tue, 19 Jan 2021 04:39:57 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id E5550140E03; Tue, 19 Jan 2021 04:39:50 +0100 (CET) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id 00E1B140DFB for ; Tue, 19 Jan 2021 04:39:47 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from dongzhou@nvidia.com) with SMTP; 19 Jan 2021 05:39:44 +0200 Received: from nvidia.com (gen-l-vrt-320.mtl.labs.mlnx [10.237.113.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 10J3dgxb001138; Tue, 19 Jan 2021 05:39:44 +0200 From: Dong Zhou To: wisamm@nvidia.com, akozyrev@nvidia.com, suanmingm@nvidia.com, thomas@monjalon.net Cc: dev@dpdk.org, rasland@nvidia.com Date: Tue, 19 Jan 2021 05:39:39 +0200 Message-Id: <1611027580-332220-2-git-send-email-dongzhou@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1611027580-332220-1-git-send-email-dongzhou@nvidia.com> References: <1610542256-285053-1-git-send-email-dongzhou@nvidia.com> <1611027580-332220-1-git-send-email-dongzhou@nvidia.com> Subject: [dpdk-dev] [PATCH v2 1/2] app/flow-perf: split calculation and output used cpu time X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The app will calculate and output used cpu time for rte flows insertion rate. It's also need for some new insertion items, such as meter. It's better to split this calculation and output part to a single function, so that all new insertion items can use it. Signed-off-by: Dong Zhou Reviewed-by: Wisam Jaddo Reviewed-by: Alexander Kozyrev --- app/test-flow-perf/main.c | 201 +++++++++++++++++++++++++++------------------- 1 file changed, 119 insertions(+), 82 deletions(-) diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c index 3a0e4c1..d10fa3f 100644 --- a/app/test-flow-perf/main.c +++ b/app/test-flow-perf/main.c @@ -93,11 +93,16 @@ struct lcore_info { static struct lcore_info lcore_infos[RTE_MAX_LCORE]; +struct used_cpu_time { + double insertion[MAX_PORTS][RTE_MAX_LCORE]; + double deletion[MAX_PORTS][RTE_MAX_LCORE]; +}; + struct multi_cores_pool { uint32_t cores_count; uint32_t rules_count; - double cpu_time_used_insertion[MAX_PORTS][RTE_MAX_LCORE]; - double cpu_time_used_deletion[MAX_PORTS][RTE_MAX_LCORE]; + struct used_cpu_time create_meter; + struct used_cpu_time create_flow; int64_t last_alloc[RTE_MAX_LCORE]; int64_t current_alloc[RTE_MAX_LCORE]; } __rte_cache_aligned; @@ -927,7 +932,7 @@ struct multi_cores_pool { printf(":: Port %d :: Core %d :: The time for deleting %d rules is %f seconds\n", port_id, core_id, rules_count_per_core, cpu_time_used); - mc_pool.cpu_time_used_deletion[port_id][core_id] = cpu_time_used; + mc_pool.create_flow.deletion[port_id][core_id] = cpu_time_used; } static struct rte_flow ** @@ -1034,7 +1039,7 @@ struct multi_cores_pool { printf(":: Port %d :: Core %d :: The time for creating %d in rules %f seconds\n", port_id, core_id, rules_count_per_core, cpu_time_used); - mc_pool.cpu_time_used_insertion[port_id][core_id] = cpu_time_used; + mc_pool.create_flow.insertion[port_id][core_id] = cpu_time_used; return flows_list; } @@ -1070,10 +1075,11 @@ struct multi_cores_pool { } } -static int -run_rte_flow_handler_cores(void *data __rte_unused) +static void +dump_used_cpu_time(const char *item, + uint16_t port, struct used_cpu_time *used_time) { - uint16_t port; + uint32_t i; /* Latency: total count of rte rules divided * over max time used by thread between all * threads time. @@ -1088,8 +1094,111 @@ struct multi_cores_pool { double deletion_throughput_time; double insertion_latency, insertion_throughput; double deletion_latency, deletion_throughput; + + /* Save first insertion/deletion rates from first thread. + * Start comparing with all threads, if any thread used + * time more than current saved, replace it. + * + * Thus in the end we will have the max time used for + * insertion/deletion by one thread. + * + * As for memory consumption, save the min of all threads + * of last alloc, and save the max for all threads for + * current alloc. + */ + + insertion_latency_time = used_time->insertion[port][0]; + deletion_latency_time = used_time->deletion[port][0]; + insertion_throughput_time = used_time->insertion[port][0]; + deletion_throughput_time = used_time->deletion[port][0]; + + i = mc_pool.cores_count; + while (i-- > 1) { + insertion_throughput_time += used_time->insertion[port][i]; + deletion_throughput_time += used_time->deletion[port][i]; + if (insertion_latency_time < used_time->insertion[port][i]) + insertion_latency_time = used_time->insertion[port][i]; + if (deletion_latency_time < used_time->deletion[port][i]) + deletion_latency_time = used_time->deletion[port][i]; + } + + insertion_latency = ((double) (mc_pool.rules_count + / insertion_latency_time) / 1000); + deletion_latency = ((double) (mc_pool.rules_count + / deletion_latency_time) / 1000); + + insertion_throughput_time /= mc_pool.cores_count; + deletion_throughput_time /= mc_pool.cores_count; + insertion_throughput = ((double) (mc_pool.rules_count + / insertion_throughput_time) / 1000); + deletion_throughput = ((double) (mc_pool.rules_count + / deletion_throughput_time) / 1000); + + /* Latency stats */ + printf("\n%s\n:: [Latency | Insertion] All Cores :: Port %d :: ", + item, port); + printf("Total flows insertion rate -> %f K Rules/Sec\n", + insertion_latency); + printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port); + printf("The time for creating %d rules is %f seconds\n", + mc_pool.rules_count, insertion_latency_time); + + /* Throughput stats */ + printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); + printf("Total flows insertion rate -> %f K Rules/Sec\n", + insertion_throughput); + printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); + printf("The average time for creating %d rules is %f seconds\n", + mc_pool.rules_count, insertion_throughput_time); + + if (delete_flag) { + /* Latency stats */ + printf(":: [Latency | Deletion] All Cores :: Port %d :: Total " + "deletion rate -> %f K Rules/Sec\n", + port, deletion_latency); + printf(":: [Latency | Deletion] All Cores :: Port %d :: ", + port); + printf("The time for deleting %d rules is %f seconds\n", + mc_pool.rules_count, deletion_latency_time); + + /* Throughput stats */ + printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total " + "deletion rate -> %f K Rules/Sec\n", + port, deletion_throughput); + printf(":: [Throughput | Deletion] All Cores :: Port %d :: ", + port); + printf("The average time for deleting %d rules is %f seconds\n", + mc_pool.rules_count, deletion_throughput_time); + } +} + +static void +dump_used_mem(uint16_t port) +{ + uint32_t i; int64_t last_alloc, current_alloc; int flow_size_in_bytes; + + last_alloc = mc_pool.last_alloc[0]; + current_alloc = mc_pool.current_alloc[0]; + + i = mc_pool.cores_count; + while (i-- > 1) { + if (last_alloc > mc_pool.last_alloc[i]) + last_alloc = mc_pool.last_alloc[i]; + if (current_alloc < mc_pool.current_alloc[i]) + current_alloc = mc_pool.current_alloc[i]; + } + + flow_size_in_bytes = (current_alloc - last_alloc) / mc_pool.rules_count; + printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n", + port, flow_size_in_bytes); +} + +static int +run_rte_flow_handler_cores(void *data __rte_unused) +{ + uint16_t port; int lcore_counter = 0; int lcore_id = rte_lcore_id(); int i; @@ -1120,83 +1229,11 @@ struct multi_cores_pool { /* Make sure all cores finished insertion/deletion process. */ rte_eal_mp_wait_lcore(); - /* Save first insertion/deletion rates from first thread. - * Start comparing with all threads, if any thread used - * time more than current saved, replace it. - * - * Thus in the end we will have the max time used for - * insertion/deletion by one thread. - * - * As for memory consumption, save the min of all threads - * of last alloc, and save the max for all threads for - * current alloc. - */ RTE_ETH_FOREACH_DEV(port) { - last_alloc = mc_pool.last_alloc[0]; - current_alloc = mc_pool.current_alloc[0]; - - insertion_latency_time = mc_pool.cpu_time_used_insertion[port][0]; - deletion_latency_time = mc_pool.cpu_time_used_deletion[port][0]; - insertion_throughput_time = mc_pool.cpu_time_used_insertion[port][0]; - deletion_throughput_time = mc_pool.cpu_time_used_deletion[port][0]; - i = mc_pool.cores_count; - while (i-- > 1) { - insertion_throughput_time += mc_pool.cpu_time_used_insertion[port][i]; - deletion_throughput_time += mc_pool.cpu_time_used_deletion[port][i]; - if (insertion_latency_time < mc_pool.cpu_time_used_insertion[port][i]) - insertion_latency_time = mc_pool.cpu_time_used_insertion[port][i]; - if (deletion_latency_time < mc_pool.cpu_time_used_deletion[port][i]) - deletion_latency_time = mc_pool.cpu_time_used_deletion[port][i]; - if (last_alloc > mc_pool.last_alloc[i]) - last_alloc = mc_pool.last_alloc[i]; - if (current_alloc < mc_pool.current_alloc[i]) - current_alloc = mc_pool.current_alloc[i]; - } - - flow_size_in_bytes = (current_alloc - last_alloc) / mc_pool.rules_count; - - insertion_latency = ((double) (mc_pool.rules_count / insertion_latency_time) / 1000); - deletion_latency = ((double) (mc_pool.rules_count / deletion_latency_time) / 1000); - insertion_throughput_time /= mc_pool.cores_count; - deletion_throughput_time /= mc_pool.cores_count; - insertion_throughput = ((double) (mc_pool.rules_count / insertion_throughput_time) / 1000); - deletion_throughput = ((double) (mc_pool.rules_count / deletion_throughput_time) / 1000); - - /* Latency stats */ - printf("\n:: [Latency | Insertion] All Cores :: Port %d :: ", port); - printf("Total flows insertion rate -> %f K Rules/Sec\n", - insertion_latency); - printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port); - printf("The time for creating %d rules is %f seconds\n", - mc_pool.rules_count, insertion_latency_time); - - /* Throughput stats */ - printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); - printf("Total flows insertion rate -> %f K Rules/Sec\n", - insertion_throughput); - printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); - printf("The average time for creating %d rules is %f seconds\n", - mc_pool.rules_count, insertion_throughput_time); - - if (delete_flag) { - /* Latency stats */ - printf(":: [Latency | Deletion] All Cores :: Port %d :: Total flows " - "deletion rate -> %f K Rules/Sec\n", - port, deletion_latency); - printf(":: [Latency | Deletion] All Cores :: Port %d :: ", port); - printf("The time for deleting %d rules is %f seconds\n", - mc_pool.rules_count, deletion_latency_time); - - /* Throughput stats */ - printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total flows " - "deletion rate -> %f K Rules/Sec\n", port, deletion_throughput); - printf(":: [Throughput | Deletion] All Cores :: Port %d :: ", port); - printf("The average time for deleting %d rules is %f seconds\n", - mc_pool.rules_count, deletion_throughput_time); - } - printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n", - port, flow_size_in_bytes); + dump_used_cpu_time("Flows:", + port, &mc_pool.create_flow); + dump_used_mem(port); } return 0; -- 1.8.3.1