From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.lysator.liu.se (mail.lysator.liu.se [130.236.254.3]) by dpdk.org (Postfix) with ESMTP id 04DD41B3B5 for ; Wed, 28 Nov 2018 18:09:04 +0100 (CET) Received: from mail.lysator.liu.se (localhost [127.0.0.1]) by mail.lysator.liu.se (Postfix) with ESMTP id 4AFBE4001A for ; Wed, 28 Nov 2018 18:09:04 +0100 (CET) Received: by mail.lysator.liu.se (Postfix, from userid 1004) id 388C140006; Wed, 28 Nov 2018 18:09:04 +0100 (CET) X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on bernadotte.lysator.liu.se X-Spam-Level: X-Spam-Status: No, score=-0.9 required=5.0 tests=ALL_TRUSTED,AWL autolearn=disabled version=3.4.1 X-Spam-Score: -0.9 Received: from [192.168.1.59] (host-90-232-89-187.mobileonline.telia.com [90.232.89.187]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.lysator.liu.se (Postfix) with ESMTPSA id 3100340005; Wed, 28 Nov 2018 18:09:03 +0100 (CET) From: =?UTF-8?Q?Mattias_R=c3=b6nnblom?= To: Venky Venkatesh , "dev@dpdk.org" References: <27A03E76-DED0-435F-B02F-24A7A7B1BCC9@contoso.com> <779258cb-490f-0111-94ce-bc87d1502ed0@lysator.liu.se> <0AD526BD-FC54-4128-829D-6D5EE8BEAFC6@paloaltonetworks.com> <7E26E1F9-4148-4F6C-9BC1-B79A419B2A97@paloaltonetworks.com> <4af7e731-6243-ce80-cc78-4d6c0ebd7135@ericsson.com> Message-ID: <53ff77d0-060f-7191-0711-c6b413777735@ericsson.com> Date: Wed, 28 Nov 2018 18:09:02 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.2.1 MIME-Version: 1.0 In-Reply-To: <4af7e731-6243-ce80-cc78-4d6c0ebd7135@ericsson.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 8bit X-Virus-Scanned: ClamAV using ClamSMTP Subject: Re: [dpdk-dev] Application used for DSW event_dev performance testing X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 Nov 2018 17:09:05 -0000 On 2018-11-28 17:55, Mattias Rönnblom wrote: > Attached is a small DSW throughput test program, that I thought might > help you to find the issue. Looks like DPDK's mailman didn't like my attachment. -- /* * dswtp - A simple DSW eventdev scheduler throughput demo program. * * SPDX-License-Identifier: BSD-3-Clause * * Copyright(c) 2018 Ericsson AB * Mattias Rönnblom */ #include #include #include #include #include #include #include #include #include #include #include #define EVENT_DEV_ID (0) #define NUM_IN_FLIGHT_EVENTS (4096) #define EVENTDEV_MAX_EVENTS (NUM_IN_FLIGHT_EVENTS * 2) #define EVENTDEV_PORT_NEW_THRESHOLD (NUM_IN_FLIGHT_EVENTS) #define NUM_FLOWS (1024) #define ITER_PER_SYNC (32) #define DEQUEUE_BURST_SIZE (32) #define ENQUEUE_BURST_SIZE (32) struct worker_ctx { uint8_t event_dev_id; uint8_t event_port_id; uint32_t events_to_produce; uint16_t num_stages; uint32_t stage_work; int64_t num_events; rte_atomic64_t *events_finished; } __rte_cache_aligned; static void usage(const char *name) { printf("%s \n", name); } static int64_t sync_event_count(rte_atomic64_t *total_events_finished, uint32_t *finished_since_sync) { if (*finished_since_sync > 0) { int64_t total; total = rte_atomic64_add_return(total_events_finished, *finished_since_sync); *finished_since_sync = 0; return total; } else return rte_atomic64_read(total_events_finished); } static void cycle_consume(uint64_t work) { uint64_t deadline; if (likely(work == 0)) return; deadline = rte_get_timer_cycles() + work; while (rte_get_timer_cycles() < deadline) rte_pause(); } static int worker_start(void *arg) { struct worker_ctx *ctx = arg; uint8_t dev_id = ctx->event_dev_id; uint8_t port_id = ctx->event_port_id; uint32_t num_produced = 0; uint32_t finished_since_sync = 0; uint16_t iter_since_sync = 0; for (;;) { uint16_t dequeued; uint16_t i; uint16_t enqueued = 0; if (unlikely(num_produced < ctx->events_to_produce)) { struct rte_event ev = { .op = RTE_EVENT_OP_NEW, .queue_id = 0, .sched_type = RTE_SCHED_TYPE_ATOMIC, .flow_id = rte_rand() % NUM_FLOWS }; if (rte_event_enqueue_new_burst(dev_id, port_id, &ev, 1) == 1) num_produced++; } struct rte_event evs[DEQUEUE_BURST_SIZE]; dequeued = rte_event_dequeue_burst(dev_id, port_id, evs, DEQUEUE_BURST_SIZE, 0); for (i = 0; i < dequeued; i++) { struct rte_event *ev = &evs[i]; uint16_t this_stage = ev->queue_id; uint16_t next_stage_num = this_stage + 1; cycle_consume(ctx->stage_work); ev->op = RTE_EVENT_OP_FORWARD; if (next_stage_num == ctx->num_stages) { finished_since_sync++; ev->queue_id = 0; } else ev->queue_id = next_stage_num; } do { uint16_t left = dequeued - enqueued; uint16_t burst_size = RTE_MIN(left, ENQUEUE_BURST_SIZE); enqueued += rte_event_enqueue_burst(dev_id, port_id, evs+enqueued, burst_size); } while (unlikely(enqueued != dequeued)); iter_since_sync++; if (unlikely(iter_since_sync == ITER_PER_SYNC)) { int64_t total = sync_event_count(ctx->events_finished, &finished_since_sync); if (total >= ctx->num_events) break; iter_since_sync = 0; } } return 0; } static void setup_event_dev(uint16_t num_stages, struct worker_ctx *worker_ctxs, unsigned num_workers) { unsigned i; struct rte_event_dev_info dev_info; for (i=0; i < num_workers; i++) worker_ctxs[i].event_dev_id = EVENT_DEV_ID; rte_event_dev_info_get(EVENT_DEV_ID, &dev_info); struct rte_event_dev_config config = { .nb_event_queues = num_stages, .nb_event_ports = num_workers, .nb_events_limit = EVENTDEV_MAX_EVENTS, .nb_event_queue_flows = dev_info.max_event_queue_flows, .nb_event_port_dequeue_depth = DEQUEUE_BURST_SIZE, .nb_event_port_enqueue_depth = ENQUEUE_BURST_SIZE }; int rc = rte_event_dev_configure(EVENT_DEV_ID, &config); if (rc) rte_panic("Failed to configure the event dev\n"); struct rte_event_queue_conf queue_config = { .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, }; for (i=0; i 0) printf("Ideal latency: %.2f s\n", tsc_to_s(ideal_latency)); printf("Actual latency: %.2f s\n", tsc_to_s(latency)); if (stage_work > 0) printf("Ideal scheduling rate: %.2f M events/s\n", (num_events*num_stages)/tsc_to_s(ideal_latency)/1e6); printf("Actual scheduling rate: %.2f M events/s\n", (num_events*num_stages)/tsc_to_s(latency)/1e6); if (stage_work > 0) { uint64_t per_stage_oh = (latency - ideal_latency) / (num_events * num_stages); printf("Scheduling overhead: %"PRId64" TSC cycles/stage\n", per_stage_oh); } rte_event_dev_stop(EVENT_DEV_ID); rte_exit(0, NULL); }