* [PATCH] app/testeventdev: add vector worker to perf test
@ 2022-12-02 10:11 Volodymyr Fialko
2022-12-06 9:27 ` Volodymyr Fialko
2023-01-17 15:29 ` [EXT] " Shijith Thotton
0 siblings, 2 replies; 4+ messages in thread
From: Volodymyr Fialko @ 2022-12-02 10:11 UTC (permalink / raw)
To: dev, Jerin Jacob; +Cc: anoobj, gakhil, Volodymyr Fialko
Add worker for handling vector events to perf tests, vector events could
be generated by crypto adapter producer.
Example:
./dpdk-test-eventdev -l 0-2 -a <EVENTDEV> -a <CRYPTODEV> -- \
--prod_type_cryptodev --crypto_adptr_mode 1 --test=perf_queue \
--stlist=a --wlcores 1 --plcores 2 --prod_enq_burst_sz 32 \
--enable_vector --vector_tmo_ns 0 --nb_flows 2
Signed-off-by: Volodymyr Fialko <vfialko@marvell.com>
---
app/test-eventdev/test_perf_atq.c | 62 ++++++++++++++--
app/test-eventdev/test_perf_common.c | 68 +++++++++++++++---
app/test-eventdev/test_perf_common.h | 102 ++++++++++++++++++++++++++-
app/test-eventdev/test_perf_queue.c | 63 +++++++++++++++--
doc/guides/tools/testeventdev.rst | 12 ++--
5 files changed, 279 insertions(+), 28 deletions(-)
diff --git a/app/test-eventdev/test_perf_atq.c b/app/test-eventdev/test_perf_atq.c
index 9d30081117..4ac60cc38b 100644
--- a/app/test-eventdev/test_perf_atq.c
+++ b/app/test-eventdev/test_perf_atq.c
@@ -24,14 +24,22 @@ atq_fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
ev->event_type = RTE_EVENT_TYPE_CPU;
}
+static __rte_always_inline void
+atq_fwd_event_vector(struct rte_event *const ev, uint8_t *const sched_type_list,
+ const uint8_t nb_stages)
+{
+ ev->sub_event_type++;
+ ev->sched_type = sched_type_list[ev->sub_event_type % nb_stages];
+ ev->op = RTE_EVENT_OP_FORWARD;
+ ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR;
+}
+
static int
perf_atq_worker(void *arg, const int enable_fwd_latency)
{
- struct perf_elt *pe = NULL;
uint16_t enq = 0, deq = 0;
struct rte_event ev;
PERF_WORKER_INIT;
- uint8_t stage;
while (t->done == false) {
deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -79,9 +87,7 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
/* +1 to avoid prefetch out of array check */
struct rte_event ev[BURST_SIZE + 1];
uint16_t enq = 0, nb_rx = 0;
- struct perf_elt *pe = NULL;
PERF_WORKER_INIT;
- uint8_t stage;
uint16_t i;
while (t->done == false) {
@@ -134,6 +140,50 @@ perf_atq_worker_burst(void *arg, const int enable_fwd_latency)
return 0;
}
+static int
+perf_atq_worker_vector(void *arg, const int enable_fwd_latency)
+{
+ uint16_t enq = 0, deq = 0;
+ struct rte_event ev;
+ PERF_WORKER_INIT;
+
+ RTE_SET_USED(sz);
+ RTE_SET_USED(cnt);
+ RTE_SET_USED(prod_crypto_type);
+
+ while (t->done == false) {
+ deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
+
+ if (!deq)
+ continue;
+
+ if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) {
+ if (perf_handle_crypto_vector_ev(&ev, &pe, enable_fwd_latency))
+ continue;
+ }
+
+ stage = ev.sub_event_type % nb_stages;
+ /* First q in pipeline, mark timestamp to compute fwd latency */
+ if (enable_fwd_latency && !prod_timer_type && stage == 0)
+ perf_mark_fwd_latency(pe);
+
+ /* Last stage in pipeline */
+ if (unlikely(stage == laststage)) {
+ perf_process_vector_last_stage(pool, t->ca_op_pool, &ev, w,
+ enable_fwd_latency);
+ } else {
+ atq_fwd_event_vector(&ev, sched_type_list, nb_stages);
+ do {
+ enq = rte_event_enqueue_burst(dev, port, &ev, 1);
+ } while (!enq && !t->done);
+ }
+ }
+
+ perf_worker_cleanup(pool, dev, port, &ev, enq, deq);
+
+ return 0;
+}
+
static int
worker_wrapper(void *arg)
{
@@ -144,7 +194,9 @@ worker_wrapper(void *arg)
const int fwd_latency = opt->fwd_latency;
/* allow compiler to optimize */
- if (!burst && !fwd_latency)
+ if (opt->ena_vector && opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR)
+ return perf_atq_worker_vector(arg, fwd_latency);
+ else if (!burst && !fwd_latency)
return perf_atq_worker(arg, 0);
else if (!burst && fwd_latency)
return perf_atq_worker(arg, 1);
diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 140c0c2dc3..8d7e483c55 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -827,10 +827,13 @@ perf_event_timer_adapter_setup(struct test_perf *t)
static int
perf_event_crypto_adapter_setup(struct test_perf *t, struct prod_data *p)
{
+ struct rte_event_crypto_adapter_queue_conf conf;
struct evt_options *opt = t->opt;
uint32_t cap;
int ret;
+ memset(&conf, 0, sizeof(conf));
+
ret = rte_event_crypto_adapter_caps_get(p->dev_id, p->ca.cdev_id, &cap);
if (ret) {
evt_err("Failed to get crypto adapter capabilities");
@@ -849,19 +852,53 @@ perf_event_crypto_adapter_setup(struct test_perf *t, struct prod_data *p)
return -ENOTSUP;
}
- if (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) {
- struct rte_event_crypto_adapter_queue_conf conf;
+ if (opt->ena_vector) {
+ struct rte_event_crypto_adapter_vector_limits limits;
+
+ if (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_EVENT_VECTOR)) {
+ evt_err("Crypto adapter doesn't support event vector");
+ return -EINVAL;
+ }
+
+ ret = rte_event_crypto_adapter_vector_limits_get(p->dev_id, p->ca.cdev_id, &limits);
+ if (ret) {
+ evt_err("Failed to get crypto adapter's vector limits");
+ return ret;
+ }
- memset(&conf, 0, sizeof(conf));
+ if (opt->vector_size < limits.min_sz || opt->vector_size > limits.max_sz) {
+ evt_err("Vector size [%d] not within limits max[%d] min[%d]",
+ opt->vector_size, limits.max_sz, limits.min_sz);
+ return -EINVAL;
+ }
+
+ if (limits.log2_sz && !rte_is_power_of_2(opt->vector_size)) {
+ evt_err("Vector size [%d] not power of 2", opt->vector_size);
+ return -EINVAL;
+ }
+
+ if (opt->vector_tmo_nsec > limits.max_timeout_ns ||
+ opt->vector_tmo_nsec < limits.min_timeout_ns) {
+ evt_err("Vector timeout [%" PRIu64 "] not within limits "
+ "max[%" PRIu64 "] min[%" PRIu64 "]",
+ opt->vector_tmo_nsec, limits.max_timeout_ns, limits.min_timeout_ns);
+ return -EINVAL;
+ }
+
+ conf.vector_mp = t->ca_vector_pool;
+ conf.vector_sz = opt->vector_size;
+ conf.vector_timeout_ns = opt->vector_tmo_nsec;
+ conf.flags |= RTE_EVENT_CRYPTO_ADAPTER_EVENT_VECTOR;
+ }
+
+ if (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) {
conf.ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
conf.ev.queue_id = p->queue_id;
- ret = rte_event_crypto_adapter_queue_pair_add(
- TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id, &conf);
- } else {
- ret = rte_event_crypto_adapter_queue_pair_add(
- TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id, NULL);
}
+ ret = rte_event_crypto_adapter_queue_pair_add(
+ TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id, &conf);
+
return ret;
}
@@ -1411,6 +1448,19 @@ perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt)
goto err;
}
+ if (opt->ena_vector) {
+ unsigned int nb_elem = (opt->pool_sz / opt->vector_size) * 2;
+ nb_elem = RTE_MAX(512U, nb_elem);
+ nb_elem += evt_nr_active_lcores(opt->wlcores) * 32;
+ t->ca_vector_pool = rte_event_vector_pool_create("vector_pool", nb_elem, 32,
+ opt->vector_size, opt->socket_id);
+ if (t->ca_vector_pool == NULL) {
+ evt_err("Failed to create event vector pool");
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+
/*
* Calculate number of needed queue pairs, based on the amount of
* available number of logical cores and crypto devices. For instance,
@@ -1467,6 +1517,7 @@ perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt)
rte_mempool_free(t->ca_op_pool);
rte_mempool_free(t->ca_sess_pool);
rte_mempool_free(t->ca_asym_sess_pool);
+ rte_mempool_free(t->ca_vector_pool);
return ret;
}
@@ -1507,6 +1558,7 @@ perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt)
rte_mempool_free(t->ca_op_pool);
rte_mempool_free(t->ca_sess_pool);
rte_mempool_free(t->ca_asym_sess_pool);
+ rte_mempool_free(t->ca_vector_pool);
}
int
diff --git a/app/test-eventdev/test_perf_common.h b/app/test-eventdev/test_perf_common.h
index 503b6aa1db..faedd471c6 100644
--- a/app/test-eventdev/test_perf_common.h
+++ b/app/test-eventdev/test_perf_common.h
@@ -71,6 +71,7 @@ struct test_perf {
struct rte_mempool *ca_op_pool;
struct rte_mempool *ca_sess_pool;
struct rte_mempool *ca_asym_sess_pool;
+ struct rte_mempool *ca_vector_pool;
} __rte_cache_aligned;
struct perf_elt {
@@ -103,6 +104,8 @@ struct perf_elt {
uint8_t cnt = 0;\
void *bufs[16] __rte_cache_aligned;\
int const sz = RTE_DIM(bufs);\
+ uint8_t stage;\
+ struct perf_elt *pe = NULL;\
if (opt->verbose_level > 1)\
printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
rte_lcore_id(), dev, port)
@@ -143,6 +146,64 @@ perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd
return 0;
}
+static __rte_always_inline struct perf_elt *
+perf_elt_from_vec_get(struct rte_event_vector *vec)
+{
+ /* Timestamp for vector event stored in first element */
+ struct rte_crypto_op *cop = vec->ptrs[0];
+ struct rte_mbuf *m;
+
+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+ m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
+ return rte_pktmbuf_mtod(m, struct perf_elt *);
+ } else {
+ return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length);
+ }
+}
+
+static __rte_always_inline int
+perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe,
+ const int enable_fwd_latency)
+{
+ struct rte_event_vector *vec = ev->vec;
+ struct rte_crypto_op *cop;
+ struct rte_mbuf *m;
+ int i, n = 0;
+ void *data;
+
+ for (i = 0; i < vec->nb_elem; i++) {
+ cop = vec->ptrs[i];
+ if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+ m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
+ rte_pktmbuf_free(m);
+ } else {
+ data = cop->asym->modex.result.data;
+ rte_mempool_put(rte_mempool_from_obj(data), data);
+ }
+ rte_crypto_op_free(cop);
+ continue;
+ }
+ vec->ptrs[n++] = cop;
+ }
+
+ /* All cops failed, free the vector */
+ if (n == 0) {
+ rte_mempool_put(rte_mempool_from_obj(vec), vec);
+ return -ENOENT;
+ }
+
+ vec->nb_elem = n;
+
+ /* Forward latency not enabled - perf data will be not accessed */
+ if (!enable_fwd_latency)
+ return 0;
+
+ /* Get pointer to perf data */
+ *pe = perf_elt_from_vec_get(vec);
+
+ return 0;
+}
static __rte_always_inline int
perf_process_last_stage(struct rte_mempool *const pool, uint8_t prod_crypto_type,
@@ -195,9 +256,8 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, uint8_t prod_cry
struct perf_elt *pe;
void *to_free_in_bulk;
- /* release fence here ensures event_prt is
- * stored before updating the number of
- * processed packets for worker lcores
+ /* Release fence here ensures event_prt is stored before updating the number of processed
+ * packets for worker lcores.
*/
rte_atomic_thread_fence(__ATOMIC_RELEASE);
w->processed_pkts++;
@@ -237,6 +297,42 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, uint8_t prod_cry
return count;
}
+static __rte_always_inline void
+perf_process_vector_last_stage(struct rte_mempool *const pool,
+ struct rte_mempool *const ca_pool, struct rte_event *const ev,
+ struct worker_data *const w, const bool enable_fwd_latency)
+{
+ struct rte_event_vector *vec = ev->vec;
+ struct rte_crypto_op *cop;
+ void *bufs[vec->nb_elem];
+ struct perf_elt *pe;
+ uint64_t latency;
+ int i;
+
+ /* Release fence here ensures event_prt is stored before updating the number of processed
+ * packets for worker lcores.
+ */
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
+ w->processed_pkts += vec->nb_elem;
+
+ if (enable_fwd_latency) {
+ pe = perf_elt_from_vec_get(vec);
+ latency = rte_get_timer_cycles() - pe->timestamp;
+ w->latency += latency;
+ }
+
+ for (i = 0; i < vec->nb_elem; i++) {
+ cop = vec->ptrs[i];
+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+ bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
+ else
+ bufs[i] = cop->asym->modex.result.data;
+ }
+
+ rte_mempool_put_bulk(pool, bufs, vec->nb_elem);
+ rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem);
+ rte_mempool_put(rte_mempool_from_obj(vec), vec);
+}
static inline int
perf_nb_event_ports(struct evt_options *opt)
diff --git a/app/test-eventdev/test_perf_queue.c b/app/test-eventdev/test_perf_queue.c
index 69ef0ebbac..2399cfb69b 100644
--- a/app/test-eventdev/test_perf_queue.c
+++ b/app/test-eventdev/test_perf_queue.c
@@ -25,15 +25,22 @@ fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
ev->event_type = RTE_EVENT_TYPE_CPU;
}
+static __rte_always_inline void
+fwd_event_vector(struct rte_event *const ev, uint8_t *const sched_type_list,
+ const uint8_t nb_stages)
+{
+ ev->queue_id++;
+ ev->sched_type = sched_type_list[ev->queue_id % nb_stages];
+ ev->op = RTE_EVENT_OP_FORWARD;
+ ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR;
+}
+
static int
perf_queue_worker(void *arg, const int enable_fwd_latency)
{
- struct perf_elt *pe = NULL;
uint16_t enq = 0, deq = 0;
struct rte_event ev;
PERF_WORKER_INIT;
- uint8_t stage;
-
while (t->done == false) {
deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -82,9 +89,7 @@ perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
/* +1 to avoid prefetch out of array check */
struct rte_event ev[BURST_SIZE + 1];
uint16_t enq = 0, nb_rx = 0;
- struct perf_elt *pe = NULL;
PERF_WORKER_INIT;
- uint8_t stage;
uint16_t i;
while (t->done == false) {
@@ -137,6 +142,50 @@ perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
return 0;
}
+static int
+perf_queue_worker_vector(void *arg, const int enable_fwd_latency)
+{
+ uint16_t enq = 0, deq = 0;
+ struct rte_event ev;
+ PERF_WORKER_INIT;
+
+ RTE_SET_USED(sz);
+ RTE_SET_USED(cnt);
+ RTE_SET_USED(prod_crypto_type);
+
+ while (t->done == false) {
+ deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
+
+ if (!deq)
+ continue;
+
+ if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) {
+ if (perf_handle_crypto_vector_ev(&ev, &pe, enable_fwd_latency))
+ continue;
+ }
+
+ stage = ev.queue_id % nb_stages;
+ /* First q in pipeline, mark timestamp to compute fwd latency */
+ if (enable_fwd_latency && !prod_timer_type && stage == 0)
+ perf_mark_fwd_latency(pe);
+
+ /* Last stage in pipeline */
+ if (unlikely(stage == laststage)) {
+ perf_process_vector_last_stage(pool, t->ca_op_pool, &ev, w,
+ enable_fwd_latency);
+ } else {
+ fwd_event_vector(&ev, sched_type_list, nb_stages);
+ do {
+ enq = rte_event_enqueue_burst(dev, port, &ev, 1);
+ } while (!enq && !t->done);
+ }
+ }
+
+ perf_worker_cleanup(pool, dev, port, &ev, enq, deq);
+
+ return 0;
+}
+
static int
worker_wrapper(void *arg)
{
@@ -147,7 +196,9 @@ worker_wrapper(void *arg)
const int fwd_latency = opt->fwd_latency;
/* allow compiler to optimize */
- if (!burst && !fwd_latency)
+ if (opt->ena_vector && opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR)
+ return perf_queue_worker_vector(arg, fwd_latency);
+ else if (!burst && !fwd_latency)
return perf_queue_worker(arg, 0);
else if (!burst && fwd_latency)
return perf_queue_worker(arg, 1);
diff --git a/doc/guides/tools/testeventdev.rst b/doc/guides/tools/testeventdev.rst
index cd278e8998..6f065b9752 100644
--- a/doc/guides/tools/testeventdev.rst
+++ b/doc/guides/tools/testeventdev.rst
@@ -185,18 +185,18 @@ The following are the application command-line options:
* ``--enable_vector``
- Enable event vector for Rx/Tx adapters.
- Only applicable for `pipeline_atq` and `pipeline_queue` tests.
+ Enable event vector for Rx/Tx/crypto adapters.
+ Only applicable for `pipeline_*` and `perf_*` tests.
* ``--vector_size``
- Vector size to configure for the Rx adapter.
- Only applicable for `pipeline_atq` and `pipeline_queue` tests.
+ Vector size to configure for the Rx/crypto adapter.
+ Only applicable for `pipeline_*` and `perf_*` tests.
* ``--vector_tmo_ns``
- Vector timeout nanoseconds to be configured for the Rx adapter.
- Only applicable for `pipeline_atq` and `pipeline_queue` tests.
+ Vector timeout nanoseconds to be configured for the Rx/crypto adapter.
+ Only applicable for `pipeline_*` and `perf_*` tests.
* ``--per_port_pool``
--
2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH] app/testeventdev: add vector worker to perf test
2022-12-02 10:11 [PATCH] app/testeventdev: add vector worker to perf test Volodymyr Fialko
@ 2022-12-06 9:27 ` Volodymyr Fialko
2023-01-17 15:29 ` [EXT] " Shijith Thotton
1 sibling, 0 replies; 4+ messages in thread
From: Volodymyr Fialko @ 2022-12-06 9:27 UTC (permalink / raw)
To: Volodymyr Fialko, dev, Jerin Jacob Kollanukkaran
Cc: Anoob Joseph, Akhil Goyal
> -----Original Message-----
> From: Volodymyr Fialko <vfialko@marvell.com>
> Sent: Friday, December 2, 2022 11:12 AM
> To: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> Cc: Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>; Volodymyr Fialko
> <vfialko@marvell.com>
> Subject: [PATCH] app/testeventdev: add vector worker to perf test
>
> Add worker for handling vector events to perf tests, vector events could be generated by crypto adapter
> producer.
>
> Example:
> ./dpdk-test-eventdev -l 0-2 -a <EVENTDEV> -a <CRYPTODEV> -- \
> --prod_type_cryptodev --crypto_adptr_mode 1 --test=perf_queue \
> --stlist=a --wlcores 1 --plcores 2 --prod_enq_burst_sz 32 \
> --enable_vector --vector_tmo_ns 0 --nb_flows 2
>
> Signed-off-by: Volodymyr Fialko <vfialko@marvell.com>
> ---
Depens-on: series-26008 ("build: fix missing crypto vec limits in version")
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [EXT] [PATCH] app/testeventdev: add vector worker to perf test
2022-12-02 10:11 [PATCH] app/testeventdev: add vector worker to perf test Volodymyr Fialko
2022-12-06 9:27 ` Volodymyr Fialko
@ 2023-01-17 15:29 ` Shijith Thotton
2023-01-24 11:53 ` Jerin Jacob
1 sibling, 1 reply; 4+ messages in thread
From: Shijith Thotton @ 2023-01-17 15:29 UTC (permalink / raw)
To: Volodymyr Fialko, dev, Jerin Jacob Kollanukkaran
Cc: Anoob Joseph, Akhil Goyal, Volodymyr Fialko
>Add worker for handling vector events to perf tests, vector events could
>be generated by crypto adapter producer.
>
>Example:
> ./dpdk-test-eventdev -l 0-2 -a <EVENTDEV> -a <CRYPTODEV> -- \
> --prod_type_cryptodev --crypto_adptr_mode 1 --test=perf_queue \
> --stlist=a --wlcores 1 --plcores 2 --prod_enq_burst_sz 32 \
> --enable_vector --vector_tmo_ns 0 --nb_flows 2
>
>Signed-off-by: Volodymyr Fialko <vfialko@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
>---
> app/test-eventdev/test_perf_atq.c | 62 ++++++++++++++--
> app/test-eventdev/test_perf_common.c | 68 +++++++++++++++---
> app/test-eventdev/test_perf_common.h | 102
>++++++++++++++++++++++++++-
> app/test-eventdev/test_perf_queue.c | 63 +++++++++++++++--
> doc/guides/tools/testeventdev.rst | 12 ++--
> 5 files changed, 279 insertions(+), 28 deletions(-)
>
>diff --git a/app/test-eventdev/test_perf_atq.c b/app/test-
>eventdev/test_perf_atq.c
>index 9d30081117..4ac60cc38b 100644
>--- a/app/test-eventdev/test_perf_atq.c
>+++ b/app/test-eventdev/test_perf_atq.c
>@@ -24,14 +24,22 @@ atq_fwd_event(struct rte_event *const ev, uint8_t *const
>sched_type_list,
> ev->event_type = RTE_EVENT_TYPE_CPU;
> }
>
>+static __rte_always_inline void
>+atq_fwd_event_vector(struct rte_event *const ev, uint8_t *const
>sched_type_list,
>+ const uint8_t nb_stages)
>+{
>+ ev->sub_event_type++;
>+ ev->sched_type = sched_type_list[ev->sub_event_type % nb_stages];
>+ ev->op = RTE_EVENT_OP_FORWARD;
>+ ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR;
>+}
>+
> static int
> perf_atq_worker(void *arg, const int enable_fwd_latency)
> {
>- struct perf_elt *pe = NULL;
> uint16_t enq = 0, deq = 0;
> struct rte_event ev;
> PERF_WORKER_INIT;
>- uint8_t stage;
>
> while (t->done == false) {
> deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
>@@ -79,9 +87,7 @@ perf_atq_worker_burst(void *arg, const int
>enable_fwd_latency)
> /* +1 to avoid prefetch out of array check */
> struct rte_event ev[BURST_SIZE + 1];
> uint16_t enq = 0, nb_rx = 0;
>- struct perf_elt *pe = NULL;
> PERF_WORKER_INIT;
>- uint8_t stage;
> uint16_t i;
>
> while (t->done == false) {
>@@ -134,6 +140,50 @@ perf_atq_worker_burst(void *arg, const int
>enable_fwd_latency)
> return 0;
> }
>
>+static int
>+perf_atq_worker_vector(void *arg, const int enable_fwd_latency)
>+{
>+ uint16_t enq = 0, deq = 0;
>+ struct rte_event ev;
>+ PERF_WORKER_INIT;
>+
>+ RTE_SET_USED(sz);
>+ RTE_SET_USED(cnt);
>+ RTE_SET_USED(prod_crypto_type);
>+
>+ while (t->done == false) {
>+ deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
>+
>+ if (!deq)
>+ continue;
>+
>+ if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) {
>+ if (perf_handle_crypto_vector_ev(&ev, &pe,
>enable_fwd_latency))
>+ continue;
>+ }
>+
>+ stage = ev.sub_event_type % nb_stages;
>+ /* First q in pipeline, mark timestamp to compute fwd latency */
>+ if (enable_fwd_latency && !prod_timer_type && stage == 0)
>+ perf_mark_fwd_latency(pe);
>+
>+ /* Last stage in pipeline */
>+ if (unlikely(stage == laststage)) {
>+ perf_process_vector_last_stage(pool, t->ca_op_pool,
>&ev, w,
>+ enable_fwd_latency);
>+ } else {
>+ atq_fwd_event_vector(&ev, sched_type_list, nb_stages);
>+ do {
>+ enq = rte_event_enqueue_burst(dev, port, &ev,
>1);
>+ } while (!enq && !t->done);
>+ }
>+ }
>+
>+ perf_worker_cleanup(pool, dev, port, &ev, enq, deq);
>+
>+ return 0;
>+}
>+
> static int
> worker_wrapper(void *arg)
> {
>@@ -144,7 +194,9 @@ worker_wrapper(void *arg)
> const int fwd_latency = opt->fwd_latency;
>
> /* allow compiler to optimize */
>- if (!burst && !fwd_latency)
>+ if (opt->ena_vector && opt->prod_type ==
>EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR)
>+ return perf_atq_worker_vector(arg, fwd_latency);
>+ else if (!burst && !fwd_latency)
> return perf_atq_worker(arg, 0);
> else if (!burst && fwd_latency)
> return perf_atq_worker(arg, 1);
>diff --git a/app/test-eventdev/test_perf_common.c b/app/test-
>eventdev/test_perf_common.c
>index 140c0c2dc3..8d7e483c55 100644
>--- a/app/test-eventdev/test_perf_common.c
>+++ b/app/test-eventdev/test_perf_common.c
>@@ -827,10 +827,13 @@ perf_event_timer_adapter_setup(struct test_perf *t)
> static int
> perf_event_crypto_adapter_setup(struct test_perf *t, struct prod_data *p)
> {
>+ struct rte_event_crypto_adapter_queue_conf conf;
> struct evt_options *opt = t->opt;
> uint32_t cap;
> int ret;
>
>+ memset(&conf, 0, sizeof(conf));
>+
> ret = rte_event_crypto_adapter_caps_get(p->dev_id, p->ca.cdev_id,
>&cap);
> if (ret) {
> evt_err("Failed to get crypto adapter capabilities");
>@@ -849,19 +852,53 @@ perf_event_crypto_adapter_setup(struct test_perf *t,
>struct prod_data *p)
> return -ENOTSUP;
> }
>
>- if (cap &
>RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) {
>- struct rte_event_crypto_adapter_queue_conf conf;
>+ if (opt->ena_vector) {
>+ struct rte_event_crypto_adapter_vector_limits limits;
>+
>+ if (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_EVENT_VECTOR))
>{
>+ evt_err("Crypto adapter doesn't support event vector");
>+ return -EINVAL;
>+ }
>+
>+ ret = rte_event_crypto_adapter_vector_limits_get(p->dev_id, p-
>>ca.cdev_id, &limits);
>+ if (ret) {
>+ evt_err("Failed to get crypto adapter's vector limits");
>+ return ret;
>+ }
>
>- memset(&conf, 0, sizeof(conf));
>+ if (opt->vector_size < limits.min_sz || opt->vector_size >
>limits.max_sz) {
>+ evt_err("Vector size [%d] not within limits max[%d]
>min[%d]",
>+ opt->vector_size, limits.max_sz, limits.min_sz);
>+ return -EINVAL;
>+ }
>+
>+ if (limits.log2_sz && !rte_is_power_of_2(opt->vector_size)) {
>+ evt_err("Vector size [%d] not power of 2", opt-
>>vector_size);
>+ return -EINVAL;
>+ }
>+
>+ if (opt->vector_tmo_nsec > limits.max_timeout_ns ||
>+ opt->vector_tmo_nsec < limits.min_timeout_ns) {
>+ evt_err("Vector timeout [%" PRIu64 "] not within limits "
>+ "max[%" PRIu64 "] min[%" PRIu64 "]",
>+ opt->vector_tmo_nsec, limits.max_timeout_ns,
>limits.min_timeout_ns);
>+ return -EINVAL;
>+ }
>+
>+ conf.vector_mp = t->ca_vector_pool;
>+ conf.vector_sz = opt->vector_size;
>+ conf.vector_timeout_ns = opt->vector_tmo_nsec;
>+ conf.flags |= RTE_EVENT_CRYPTO_ADAPTER_EVENT_VECTOR;
>+ }
>+
>+ if (cap &
>RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND) {
> conf.ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
> conf.ev.queue_id = p->queue_id;
>- ret = rte_event_crypto_adapter_queue_pair_add(
>- TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id,
>&conf);
>- } else {
>- ret = rte_event_crypto_adapter_queue_pair_add(
>- TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id,
>NULL);
> }
>
>+ ret = rte_event_crypto_adapter_queue_pair_add(
>+ TEST_PERF_CA_ID, p->ca.cdev_id, p->ca.cdev_qp_id, &conf);
>+
> return ret;
> }
>
>@@ -1411,6 +1448,19 @@ perf_cryptodev_setup(struct evt_test *test, struct
>evt_options *opt)
> goto err;
> }
>
>+ if (opt->ena_vector) {
>+ unsigned int nb_elem = (opt->pool_sz / opt->vector_size) * 2;
>+ nb_elem = RTE_MAX(512U, nb_elem);
>+ nb_elem += evt_nr_active_lcores(opt->wlcores) * 32;
>+ t->ca_vector_pool =
>rte_event_vector_pool_create("vector_pool", nb_elem, 32,
>+ opt->vector_size, opt->socket_id);
>+ if (t->ca_vector_pool == NULL) {
>+ evt_err("Failed to create event vector pool");
>+ ret = -ENOMEM;
>+ goto err;
>+ }
>+ }
>+
> /*
> * Calculate number of needed queue pairs, based on the amount of
> * available number of logical cores and crypto devices. For instance,
>@@ -1467,6 +1517,7 @@ perf_cryptodev_setup(struct evt_test *test, struct
>evt_options *opt)
> rte_mempool_free(t->ca_op_pool);
> rte_mempool_free(t->ca_sess_pool);
> rte_mempool_free(t->ca_asym_sess_pool);
>+ rte_mempool_free(t->ca_vector_pool);
>
> return ret;
> }
>@@ -1507,6 +1558,7 @@ perf_cryptodev_destroy(struct evt_test *test, struct
>evt_options *opt)
> rte_mempool_free(t->ca_op_pool);
> rte_mempool_free(t->ca_sess_pool);
> rte_mempool_free(t->ca_asym_sess_pool);
>+ rte_mempool_free(t->ca_vector_pool);
> }
>
> int
>diff --git a/app/test-eventdev/test_perf_common.h b/app/test-
>eventdev/test_perf_common.h
>index 503b6aa1db..faedd471c6 100644
>--- a/app/test-eventdev/test_perf_common.h
>+++ b/app/test-eventdev/test_perf_common.h
>@@ -71,6 +71,7 @@ struct test_perf {
> struct rte_mempool *ca_op_pool;
> struct rte_mempool *ca_sess_pool;
> struct rte_mempool *ca_asym_sess_pool;
>+ struct rte_mempool *ca_vector_pool;
> } __rte_cache_aligned;
>
> struct perf_elt {
>@@ -103,6 +104,8 @@ struct perf_elt {
> uint8_t cnt = 0;\
> void *bufs[16] __rte_cache_aligned;\
> int const sz = RTE_DIM(bufs);\
>+ uint8_t stage;\
>+ struct perf_elt *pe = NULL;\
> if (opt->verbose_level > 1)\
> printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
> rte_lcore_id(), dev, port)
>@@ -143,6 +146,64 @@ perf_handle_crypto_ev(struct rte_event *ev, struct
>perf_elt **pe, int enable_fwd
> return 0;
> }
>
>+static __rte_always_inline struct perf_elt *
>+perf_elt_from_vec_get(struct rte_event_vector *vec)
>+{
>+ /* Timestamp for vector event stored in first element */
>+ struct rte_crypto_op *cop = vec->ptrs[0];
>+ struct rte_mbuf *m;
>+
>+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
>+ m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym-
>>m_dst;
>+ return rte_pktmbuf_mtod(m, struct perf_elt *);
>+ } else {
>+ return RTE_PTR_ADD(cop->asym->modex.result.data, cop-
>>asym->modex.result.length);
>+ }
>+}
>+
>+static __rte_always_inline int
>+perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe,
>+ const int enable_fwd_latency)
>+{
>+ struct rte_event_vector *vec = ev->vec;
>+ struct rte_crypto_op *cop;
>+ struct rte_mbuf *m;
>+ int i, n = 0;
>+ void *data;
>+
>+ for (i = 0; i < vec->nb_elem; i++) {
>+ cop = vec->ptrs[i];
>+ if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
>+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
>+ m = cop->sym->m_dst == NULL ? cop->sym-
>>m_src : cop->sym->m_dst;
>+ rte_pktmbuf_free(m);
>+ } else {
>+ data = cop->asym->modex.result.data;
>+
> rte_mempool_put(rte_mempool_from_obj(data), data);
>+ }
>+ rte_crypto_op_free(cop);
>+ continue;
>+ }
>+ vec->ptrs[n++] = cop;
>+ }
>+
>+ /* All cops failed, free the vector */
>+ if (n == 0) {
>+ rte_mempool_put(rte_mempool_from_obj(vec), vec);
>+ return -ENOENT;
>+ }
>+
>+ vec->nb_elem = n;
>+
>+ /* Forward latency not enabled - perf data will be not accessed */
>+ if (!enable_fwd_latency)
>+ return 0;
>+
>+ /* Get pointer to perf data */
>+ *pe = perf_elt_from_vec_get(vec);
>+
>+ return 0;
>+}
>
> static __rte_always_inline int
> perf_process_last_stage(struct rte_mempool *const pool, uint8_t
>prod_crypto_type,
>@@ -195,9 +256,8 @@ perf_process_last_stage_latency(struct rte_mempool
>*const pool, uint8_t prod_cry
> struct perf_elt *pe;
> void *to_free_in_bulk;
>
>- /* release fence here ensures event_prt is
>- * stored before updating the number of
>- * processed packets for worker lcores
>+ /* Release fence here ensures event_prt is stored before updating the
>number of processed
>+ * packets for worker lcores.
> */
> rte_atomic_thread_fence(__ATOMIC_RELEASE);
> w->processed_pkts++;
>@@ -237,6 +297,42 @@ perf_process_last_stage_latency(struct rte_mempool
>*const pool, uint8_t prod_cry
> return count;
> }
>
>+static __rte_always_inline void
>+perf_process_vector_last_stage(struct rte_mempool *const pool,
>+ struct rte_mempool *const ca_pool, struct rte_event *const ev,
>+ struct worker_data *const w, const bool enable_fwd_latency)
>+{
>+ struct rte_event_vector *vec = ev->vec;
>+ struct rte_crypto_op *cop;
>+ void *bufs[vec->nb_elem];
>+ struct perf_elt *pe;
>+ uint64_t latency;
>+ int i;
>+
>+ /* Release fence here ensures event_prt is stored before updating the
>number of processed
>+ * packets for worker lcores.
>+ */
>+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
>+ w->processed_pkts += vec->nb_elem;
>+
>+ if (enable_fwd_latency) {
>+ pe = perf_elt_from_vec_get(vec);
>+ latency = rte_get_timer_cycles() - pe->timestamp;
>+ w->latency += latency;
>+ }
>+
>+ for (i = 0; i < vec->nb_elem; i++) {
>+ cop = vec->ptrs[i];
>+ if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
>+ bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src :
>cop->sym->m_dst;
>+ else
>+ bufs[i] = cop->asym->modex.result.data;
>+ }
>+
>+ rte_mempool_put_bulk(pool, bufs, vec->nb_elem);
>+ rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec-
>>nb_elem);
>+ rte_mempool_put(rte_mempool_from_obj(vec), vec);
>+}
>
> static inline int
> perf_nb_event_ports(struct evt_options *opt)
>diff --git a/app/test-eventdev/test_perf_queue.c b/app/test-
>eventdev/test_perf_queue.c
>index 69ef0ebbac..2399cfb69b 100644
>--- a/app/test-eventdev/test_perf_queue.c
>+++ b/app/test-eventdev/test_perf_queue.c
>@@ -25,15 +25,22 @@ fwd_event(struct rte_event *const ev, uint8_t *const
>sched_type_list,
> ev->event_type = RTE_EVENT_TYPE_CPU;
> }
>
>+static __rte_always_inline void
>+fwd_event_vector(struct rte_event *const ev, uint8_t *const sched_type_list,
>+ const uint8_t nb_stages)
>+{
>+ ev->queue_id++;
>+ ev->sched_type = sched_type_list[ev->queue_id % nb_stages];
>+ ev->op = RTE_EVENT_OP_FORWARD;
>+ ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR;
>+}
>+
> static int
> perf_queue_worker(void *arg, const int enable_fwd_latency)
> {
>- struct perf_elt *pe = NULL;
> uint16_t enq = 0, deq = 0;
> struct rte_event ev;
> PERF_WORKER_INIT;
>- uint8_t stage;
>-
>
> while (t->done == false) {
> deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
>@@ -82,9 +89,7 @@ perf_queue_worker_burst(void *arg, const int
>enable_fwd_latency)
> /* +1 to avoid prefetch out of array check */
> struct rte_event ev[BURST_SIZE + 1];
> uint16_t enq = 0, nb_rx = 0;
>- struct perf_elt *pe = NULL;
> PERF_WORKER_INIT;
>- uint8_t stage;
> uint16_t i;
>
> while (t->done == false) {
>@@ -137,6 +142,50 @@ perf_queue_worker_burst(void *arg, const int
>enable_fwd_latency)
> return 0;
> }
>
>+static int
>+perf_queue_worker_vector(void *arg, const int enable_fwd_latency)
>+{
>+ uint16_t enq = 0, deq = 0;
>+ struct rte_event ev;
>+ PERF_WORKER_INIT;
>+
>+ RTE_SET_USED(sz);
>+ RTE_SET_USED(cnt);
>+ RTE_SET_USED(prod_crypto_type);
>+
>+ while (t->done == false) {
>+ deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
>+
>+ if (!deq)
>+ continue;
>+
>+ if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) {
>+ if (perf_handle_crypto_vector_ev(&ev, &pe,
>enable_fwd_latency))
>+ continue;
>+ }
>+
>+ stage = ev.queue_id % nb_stages;
>+ /* First q in pipeline, mark timestamp to compute fwd latency */
>+ if (enable_fwd_latency && !prod_timer_type && stage == 0)
>+ perf_mark_fwd_latency(pe);
>+
>+ /* Last stage in pipeline */
>+ if (unlikely(stage == laststage)) {
>+ perf_process_vector_last_stage(pool, t->ca_op_pool,
>&ev, w,
>+ enable_fwd_latency);
>+ } else {
>+ fwd_event_vector(&ev, sched_type_list, nb_stages);
>+ do {
>+ enq = rte_event_enqueue_burst(dev, port, &ev,
>1);
>+ } while (!enq && !t->done);
>+ }
>+ }
>+
>+ perf_worker_cleanup(pool, dev, port, &ev, enq, deq);
>+
>+ return 0;
>+}
>+
> static int
> worker_wrapper(void *arg)
> {
>@@ -147,7 +196,9 @@ worker_wrapper(void *arg)
> const int fwd_latency = opt->fwd_latency;
>
> /* allow compiler to optimize */
>- if (!burst && !fwd_latency)
>+ if (opt->ena_vector && opt->prod_type ==
>EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR)
>+ return perf_queue_worker_vector(arg, fwd_latency);
>+ else if (!burst && !fwd_latency)
> return perf_queue_worker(arg, 0);
> else if (!burst && fwd_latency)
> return perf_queue_worker(arg, 1);
>diff --git a/doc/guides/tools/testeventdev.rst
>b/doc/guides/tools/testeventdev.rst
>index cd278e8998..6f065b9752 100644
>--- a/doc/guides/tools/testeventdev.rst
>+++ b/doc/guides/tools/testeventdev.rst
>@@ -185,18 +185,18 @@ The following are the application command-line options:
>
> * ``--enable_vector``
>
>- Enable event vector for Rx/Tx adapters.
>- Only applicable for `pipeline_atq` and `pipeline_queue` tests.
>+ Enable event vector for Rx/Tx/crypto adapters.
>+ Only applicable for `pipeline_*` and `perf_*` tests.
>
> * ``--vector_size``
>
>- Vector size to configure for the Rx adapter.
>- Only applicable for `pipeline_atq` and `pipeline_queue` tests.
>+ Vector size to configure for the Rx/crypto adapter.
>+ Only applicable for `pipeline_*` and `perf_*` tests.
>
> * ``--vector_tmo_ns``
>
>- Vector timeout nanoseconds to be configured for the Rx adapter.
>- Only applicable for `pipeline_atq` and `pipeline_queue` tests.
>+ Vector timeout nanoseconds to be configured for the Rx/crypto adapter.
>+ Only applicable for `pipeline_*` and `perf_*` tests.
>
> * ``--per_port_pool``
>
>--
>2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [EXT] [PATCH] app/testeventdev: add vector worker to perf test
2023-01-17 15:29 ` [EXT] " Shijith Thotton
@ 2023-01-24 11:53 ` Jerin Jacob
0 siblings, 0 replies; 4+ messages in thread
From: Jerin Jacob @ 2023-01-24 11:53 UTC (permalink / raw)
To: Shijith Thotton
Cc: Volodymyr Fialko, dev, Jerin Jacob Kollanukkaran, Anoob Joseph,
Akhil Goyal
On Tue, Jan 17, 2023 at 8:59 PM Shijith Thotton <sthotton@marvell.com> wrote:
>
>
> >Add worker for handling vector events to perf tests, vector events could
> >be generated by crypto adapter producer.
> >
> >Example:
> > ./dpdk-test-eventdev -l 0-2 -a <EVENTDEV> -a <CRYPTODEV> -- \
> > --prod_type_cryptodev --crypto_adptr_mode 1 --test=perf_queue \
> > --stlist=a --wlcores 1 --plcores 2 --prod_enq_burst_sz 32 \
> > --enable_vector --vector_tmo_ns 0 --nb_flows 2
> >
> >Signed-off-by: Volodymyr Fialko <vfialko@marvell.com>
>
> Acked-by: Shijith Thotton <sthotton@marvell.com>
Applied to dpdk-next-net-eventdev/for-main. Thanks
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-01-24 11:53 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-02 10:11 [PATCH] app/testeventdev: add vector worker to perf test Volodymyr Fialko
2022-12-06 9:27 ` Volodymyr Fialko
2023-01-17 15:29 ` [EXT] " Shijith Thotton
2023-01-24 11:53 ` Jerin Jacob
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).