From: "Mattias Rönnblom" <mattias.ronnblom@ericsson.com>
To: Venky Venkatesh <vvenkatesh@paloaltonetworks.com>,
"dev@dpdk.org" <dev@dpdk.org>
Subject: Re: [dpdk-dev] Application used for DSW event_dev performance testing
Date: Wed, 28 Nov 2018 18:09:02 +0100 [thread overview]
Message-ID: <53ff77d0-060f-7191-0711-c6b413777735@ericsson.com> (raw)
In-Reply-To: <4af7e731-6243-ce80-cc78-4d6c0ebd7135@ericsson.com>
On 2018-11-28 17:55, Mattias Rönnblom wrote:
> Attached is a small DSW throughput test program, that I thought might
> help you to find the issue.
Looks like DPDK's mailman didn't like my attachment.
--
/*
* dswtp - A simple DSW eventdev scheduler throughput demo program.
*
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright(c) 2018 Ericsson AB
* Mattias Rönnblom <mattias.ronnblom@ericsson.com>
*/
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
#include <rte_eal.h>
#include <rte_eventdev.h>
#include <rte_lcore.h>
#include <rte_malloc.h>
#include <rte_pause.h>
#include <rte_random.h>
#define EVENT_DEV_ID (0)
#define NUM_IN_FLIGHT_EVENTS (4096)
#define EVENTDEV_MAX_EVENTS (NUM_IN_FLIGHT_EVENTS * 2)
#define EVENTDEV_PORT_NEW_THRESHOLD (NUM_IN_FLIGHT_EVENTS)
#define NUM_FLOWS (1024)
#define ITER_PER_SYNC (32)
#define DEQUEUE_BURST_SIZE (32)
#define ENQUEUE_BURST_SIZE (32)
struct worker_ctx
{
uint8_t event_dev_id;
uint8_t event_port_id;
uint32_t events_to_produce;
uint16_t num_stages;
uint32_t stage_work;
int64_t num_events;
rte_atomic64_t *events_finished;
} __rte_cache_aligned;
static void
usage(const char *name)
{
printf("%s <num-stages> <stage-proc-cycles> <num-events[M]>\n", name);
}
static int64_t
sync_event_count(rte_atomic64_t *total_events_finished,
uint32_t *finished_since_sync)
{
if (*finished_since_sync > 0) {
int64_t total;
total = rte_atomic64_add_return(total_events_finished,
*finished_since_sync);
*finished_since_sync = 0;
return total;
} else
return rte_atomic64_read(total_events_finished);
}
static void
cycle_consume(uint64_t work)
{
uint64_t deadline;
if (likely(work == 0))
return;
deadline = rte_get_timer_cycles() + work;
while (rte_get_timer_cycles() < deadline)
rte_pause();
}
static int
worker_start(void *arg)
{
struct worker_ctx *ctx = arg;
uint8_t dev_id = ctx->event_dev_id;
uint8_t port_id = ctx->event_port_id;
uint32_t num_produced = 0;
uint32_t finished_since_sync = 0;
uint16_t iter_since_sync = 0;
for (;;) {
uint16_t dequeued;
uint16_t i;
uint16_t enqueued = 0;
if (unlikely(num_produced < ctx->events_to_produce)) {
struct rte_event ev = {
.op = RTE_EVENT_OP_NEW,
.queue_id = 0,
.sched_type = RTE_SCHED_TYPE_ATOMIC,
.flow_id = rte_rand() % NUM_FLOWS
};
if (rte_event_enqueue_new_burst(dev_id, port_id,
&ev, 1) == 1)
num_produced++;
}
struct rte_event evs[DEQUEUE_BURST_SIZE];
dequeued = rte_event_dequeue_burst(dev_id, port_id, evs,
DEQUEUE_BURST_SIZE, 0);
for (i = 0; i < dequeued; i++) {
struct rte_event *ev = &evs[i];
uint16_t this_stage = ev->queue_id;
uint16_t next_stage_num = this_stage + 1;
cycle_consume(ctx->stage_work);
ev->op = RTE_EVENT_OP_FORWARD;
if (next_stage_num == ctx->num_stages) {
finished_since_sync++;
ev->queue_id = 0;
} else
ev->queue_id = next_stage_num;
}
do {
uint16_t left = dequeued - enqueued;
uint16_t burst_size =
RTE_MIN(left, ENQUEUE_BURST_SIZE);
enqueued +=
rte_event_enqueue_burst(dev_id, port_id,
evs+enqueued,
burst_size);
} while (unlikely(enqueued != dequeued));
iter_since_sync++;
if (unlikely(iter_since_sync == ITER_PER_SYNC)) {
int64_t total =
sync_event_count(ctx->events_finished,
&finished_since_sync);
if (total >= ctx->num_events)
break;
iter_since_sync = 0;
}
}
return 0;
}
static void
setup_event_dev(uint16_t num_stages, struct worker_ctx *worker_ctxs,
unsigned num_workers)
{
unsigned i;
struct rte_event_dev_info dev_info;
for (i=0; i < num_workers; i++)
worker_ctxs[i].event_dev_id = EVENT_DEV_ID;
rte_event_dev_info_get(EVENT_DEV_ID, &dev_info);
struct rte_event_dev_config config = {
.nb_event_queues = num_stages,
.nb_event_ports = num_workers,
.nb_events_limit = EVENTDEV_MAX_EVENTS,
.nb_event_queue_flows = dev_info.max_event_queue_flows,
.nb_event_port_dequeue_depth = DEQUEUE_BURST_SIZE,
.nb_event_port_enqueue_depth = ENQUEUE_BURST_SIZE
};
int rc = rte_event_dev_configure(EVENT_DEV_ID, &config);
if (rc)
rte_panic("Failed to configure the event dev\n");
struct rte_event_queue_conf queue_config = {
.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
};
for (i=0; i<num_stages; i++) {
uint8_t queue_id = i;
queue_config.schedule_type = RTE_SCHED_TYPE_ATOMIC;
queue_config.nb_atomic_flows = NUM_FLOWS;
queue_config.nb_atomic_order_sequences = NUM_FLOWS;
if (rte_event_queue_setup(EVENT_DEV_ID, queue_id,
&queue_config))
rte_panic("Unable to setup queue %d\n", queue_id);
}
struct rte_event_port_conf port_config = {
.new_event_threshold = EVENTDEV_PORT_NEW_THRESHOLD,
.dequeue_depth = DEQUEUE_BURST_SIZE,
.enqueue_depth = ENQUEUE_BURST_SIZE
};
for (i=0; i<num_workers; i++) {
uint8_t event_port_id = i;
worker_ctxs[i].event_port_id = event_port_id;
if (rte_event_port_setup(EVENT_DEV_ID, event_port_id,
&port_config) < 0)
rte_panic("Failed to create worker port #%d\n",
event_port_id);
}
for (i=0; i<num_workers; i++) {
uint8_t event_port_id = i;
if (rte_event_port_link(EVENT_DEV_ID, event_port_id,
NULL, NULL, 0)
!= (int)num_stages)
rte_panic("Failed to map worker ports\n");
}
if (rte_event_dev_start(EVENT_DEV_ID))
rte_panic("Unable to start eventdev\n");
}
static double
tsc_to_s(uint64_t tsc)
{
return (double)tsc/(double)rte_get_timer_hz();
}
int main(int argc, char *argv[])
{
int rc;
unsigned i;
unsigned num_workers;
uint16_t num_stages;
uint32_t stage_work;
int64_t num_events;
struct worker_ctx *worker_ctxs;
rte_atomic64_t *events_finished;
unsigned lcore_id;
uint64_t start;
uint64_t latency;
uint64_t ideal_latency;
rc = rte_eal_init(argc, argv);
if (rc < 0)
rte_panic("Invalid EAL arguments\n");
argc -= rc;
argv += rc;
if (argc != 4) {
usage(argv[0]);
exit(EXIT_FAILURE);
}
num_stages = atoi(argv[1]);
stage_work = atoi(argv[2]);
num_events = atof(argv[3]) * 1e6;
num_workers = rte_lcore_count();
worker_ctxs = rte_malloc("worker-ctx",
sizeof(struct worker_ctx) * num_workers,
RTE_CACHE_LINE_SIZE);
events_finished = rte_malloc("finished-events", sizeof(rte_atomic64_t),
RTE_CACHE_LINE_SIZE);
if (worker_ctxs == NULL || events_finished == NULL)
rte_panic("Unable to allocate memory\n");
rte_atomic64_init(events_finished);
for (i=0; i<num_workers; i++) {
struct worker_ctx *w = &worker_ctxs[i];
*w = (struct worker_ctx) {
.event_dev_id = EVENT_DEV_ID,
.event_port_id = i,
.events_to_produce = NUM_IN_FLIGHT_EVENTS/num_workers,
.num_stages = num_stages,
.stage_work = stage_work,
.num_events = num_events,
.events_finished = events_finished
};
}
setup_event_dev(num_stages, worker_ctxs, num_workers);
start = rte_get_timer_cycles();
rte_compiler_barrier();
i = 0;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (rte_eal_remote_launch(worker_start, &(worker_ctxs[i]),
lcore_id))
rte_panic("Failed to launch worker");
i++;
}
worker_start(&worker_ctxs[num_workers-1]);
rte_eal_mp_wait_lcore();
rte_compiler_barrier();
latency = rte_get_timer_cycles() - start;
ideal_latency = (stage_work * num_stages * num_events) / num_workers;
printf("Workers: %d\n", num_workers);
printf("Stages: %d\n", num_stages);
printf("Per-stage application processing: %d TSC cycles\n",
stage_work);
printf("Events: %"PRId64" M\n", num_events/1000000);
if (stage_work > 0)
printf("Ideal latency: %.2f s\n", tsc_to_s(ideal_latency));
printf("Actual latency: %.2f s\n", tsc_to_s(latency));
if (stage_work > 0)
printf("Ideal scheduling rate: %.2f M events/s\n",
(num_events*num_stages)/tsc_to_s(ideal_latency)/1e6);
printf("Actual scheduling rate: %.2f M events/s\n",
(num_events*num_stages)/tsc_to_s(latency)/1e6);
if (stage_work > 0) {
uint64_t per_stage_oh =
(latency - ideal_latency) / (num_events * num_stages);
printf("Scheduling overhead: %"PRId64" TSC cycles/stage\n",
per_stage_oh);
}
rte_event_dev_stop(EVENT_DEV_ID);
rte_exit(0, NULL);
}
prev parent reply other threads:[~2018-11-28 17:09 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-14 19:16 Venky Venkatesh
2018-11-14 19:41 ` Mattias Rönnblom
2018-11-14 21:56 ` Venky Venkatesh
2018-11-15 5:46 ` Mattias Rönnblom
2018-11-27 22:33 ` Venky Venkatesh
2018-11-28 16:55 ` Mattias Rönnblom
2018-11-28 17:09 ` Mattias Rönnblom [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=53ff77d0-060f-7191-0711-c6b413777735@ericsson.com \
--to=mattias.ronnblom@ericsson.com \
--cc=dev@dpdk.org \
--cc=vvenkatesh@paloaltonetworks.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).