v6: minor refactory suggested by Tom Rix, v5: correcting typos again. Quite worrisome spelling disorder. v4: rebased on main latest v3: apologize again for typo and not double checking with check-git-log v2: typos missed in commit messages Serie updating and extending the app running the bbdev-test for the existing bbdev PMDs. Nicolas Chautru (7): app/bbdev: add explicit ut for latency vs validation app/bbdev: add explicit check for counters app/bbdev: include explicit HARQ preloading app/bbdev: define wait for offload app/bbdev: skip bler ut when compression is used app/bbdev: reduce duration of throughput test app/bbdev: update offload test to dequeue full ring app/test-bbdev/main.h | 1 + app/test-bbdev/test_bbdev_perf.c | 158 +++++++++++++++++++++++++-------------- 2 files changed, 103 insertions(+), 56 deletions(-) -- 1.8.3.1
Adding explicit different ut when testing for validation or latency (early termination enabled or not). Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com> Acked-by: Aidan Goddard <aidan.goddard@accelercomm.com> Acked-by: Dave Burley <dave.burley@accelercomm.com> --- app/test-bbdev/test_bbdev_perf.c | 55 +++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 6e5535d..04334e3 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -3999,12 +3999,14 @@ typedef int (test_case_function)(struct active_device *ad, return i; } +/* Test case for latency/validation for LDPC Decoder */ static int latency_test_ldpc_dec(struct rte_mempool *mempool, struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, int vector_mask, uint16_t dev_id, uint16_t queue_id, const uint16_t num_to_process, uint16_t burst_sz, - uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) + uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, + bool disable_et) { int ret = TEST_SUCCESS; uint16_t i, j, dequeued; @@ -4026,7 +4028,7 @@ typedef int (test_case_function)(struct active_device *ad, "rte_bbdev_dec_op_alloc_bulk() failed"); /* For latency tests we need to disable early termination */ - if (check_bit(ref_op->ldpc_dec.op_flags, + if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; @@ -4221,9 +4223,10 @@ typedef int (test_case_function)(struct active_device *ad, return i; } +/* Common function for running validation and latency test cases */ static int -latency_test(struct active_device *ad, - struct test_op_params *op_params) +validation_latency_test(struct active_device *ad, + struct test_op_params *op_params, bool latency_flag) { int iter; uint16_t burst_sz = op_params->burst_sz; @@ -4248,7 +4251,11 @@ typedef int (test_case_function)(struct active_device *ad, TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); printf("+ ------------------------------------------------------- +\n"); - printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", + if (latency_flag) + printf("== test: latency\ndev:"); + else + printf("== test: validation\ndev:"); + printf("%s, burst size: %u, num ops: %u, op type: %s\n", info.dev_name, burst_sz, num_to_process, op_type_str); if (op_type == RTE_BBDEV_OP_TURBO_DEC) @@ -4256,11 +4263,6 @@ typedef int (test_case_function)(struct active_device *ad, op_params->ref_dec_op, op_params->vector_mask, ad->dev_id, queue_id, num_to_process, burst_sz, &total_time, &min_time, &max_time); - else if (op_type == RTE_BBDEV_OP_TURBO_ENC) - iter = latency_test_enc(op_params->mp, bufs, - op_params->ref_enc_op, ad->dev_id, queue_id, - num_to_process, burst_sz, &total_time, - &min_time, &max_time); else if (op_type == RTE_BBDEV_OP_LDPC_ENC) iter = latency_test_ldpc_enc(op_params->mp, bufs, op_params->ref_enc_op, ad->dev_id, queue_id, @@ -4270,13 +4272,14 @@ typedef int (test_case_function)(struct active_device *ad, iter = latency_test_ldpc_dec(op_params->mp, bufs, op_params->ref_dec_op, op_params->vector_mask, ad->dev_id, queue_id, num_to_process, - burst_sz, &total_time, &min_time, &max_time); - else + burst_sz, &total_time, &min_time, &max_time, + latency_flag); + else /* RTE_BBDEV_OP_TURBO_ENC */ iter = latency_test_enc(op_params->mp, bufs, - op_params->ref_enc_op, - ad->dev_id, queue_id, - num_to_process, burst_sz, &total_time, - &min_time, &max_time); + op_params->ref_enc_op, + ad->dev_id, queue_id, + num_to_process, burst_sz, &total_time, + &min_time, &max_time); if (iter <= 0) return TEST_FAILED; @@ -4295,6 +4298,18 @@ typedef int (test_case_function)(struct active_device *ad, return TEST_SUCCESS; } +static int +latency_test(struct active_device *ad, struct test_op_params *op_params) +{ + return validation_latency_test(ad, op_params, true); +} + +static int +validation_test(struct active_device *ad, struct test_op_params *op_params) +{ + return validation_latency_test(ad, op_params, false); +} + #ifdef RTE_BBDEV_OFFLOAD_COST static int get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, @@ -4930,6 +4945,12 @@ typedef int (test_case_function)(struct active_device *ad, } static int +validation_tc(void) +{ + return run_test_case(validation_test); +} + +static int interrupt_tc(void) { return run_test_case(throughput_test); @@ -4960,7 +4981,7 @@ typedef int (test_case_function)(struct active_device *ad, .setup = testsuite_setup, .teardown = testsuite_teardown, .unit_test_cases = { - TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), + TEST_CASE_ST(ut_setup, ut_teardown, validation_tc), TEST_CASES_END() /**< NULL terminate unit test array */ } }; -- 1.8.3.1
Adding explicit check in ut that the stats counters have the expect values. This was identified as a gap from code coverage. Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com> Acked-by: Aidan Goddard <aidan.goddard@accelercomm.com> Acked-by: Dave Burley <dave.burley@accelercomm.com> --- app/test-bbdev/test_bbdev_perf.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 04334e3..f32cb1c 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -4777,6 +4777,23 @@ typedef int (test_case_function)(struct active_device *ad, (double)(time_st.deq_max_time * 1000000) / rte_get_tsc_hz()); + struct rte_bbdev_stats stats = {0}; + get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); + if (op_type != RTE_BBDEV_OP_LDPC_DEC) { + TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, + "Mismatch in enqueue count %10"PRIu64" %d", + stats.enqueued_count, num_to_process); + TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, + "Mismatch in dequeue count %10"PRIu64" %d", + stats.dequeued_count, num_to_process); + } + TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, + "Enqueue count Error %10"PRIu64"", + stats.enqueue_err_count); + TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, + "Dequeue count Error (%10"PRIu64"", + stats.dequeue_err_count); + return TEST_SUCCESS; #endif } -- 1.8.3.1
Run preloading explicitly for unit tests. Load each code block by reusing existing input op then restore for the actual test. Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com> Acked-by: Liu Tianjiao <tianjiao.liu@intel.com> --- app/test-bbdev/main.h | 1 + app/test-bbdev/test_bbdev_perf.c | 53 +++++++++++++++++++++------------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h index fb3dec8..dc10a50 100644 --- a/app/test-bbdev/main.h +++ b/app/test-bbdev/main.h @@ -17,6 +17,7 @@ #define TEST_SKIPPED 1 #define MAX_BURST 512U +#define MAX_OPS 1024U #define DEFAULT_BURST 32U #define DEFAULT_OPS 64U #define DEFAULT_ITER 6U diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index f32cb1c..969e1d1 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -2467,7 +2467,7 @@ typedef int (test_case_function)(struct active_device *ad, { uint16_t j; int save_status, ret; - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; uint32_t flags = ops[0]->ldpc_dec.op_flags; bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct active_device *ad, bool preload) { uint16_t j; - int ret; - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; - struct rte_bbdev_op_data save_hc_in, save_hc_out; - struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; + int deq; + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; + struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; uint32_t flags = ops[0]->ldpc_dec.op_flags; bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; - for (j = 0; j < n; ++j) { - if ((mem_in || hc_in) && preload) { - save_hc_in = ops[j]->ldpc_dec.harq_combined_input; - save_hc_out = ops[j]->ldpc_dec.harq_combined_output; + if ((mem_in || hc_in) && preload) { + for (j = 0; j < n; ++j) { + save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; + save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; ops[j]->ldpc_dec.op_flags = RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct active_device *ad, ops[j]->ldpc_dec.harq_combined_output.offset = harq_offset; ops[j]->ldpc_dec.harq_combined_input.offset = 0; - rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, - &ops[j], 1); - ret = 0; - while (ret == 0) - ret = rte_bbdev_dequeue_ldpc_dec_ops( - dev_id, queue_id, &ops_deq[j], 1); + harq_offset += HARQ_INCR; + } + rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); + deq = 0; + while (deq != n) + deq += rte_bbdev_dequeue_ldpc_dec_ops( + dev_id, queue_id, &ops_deq[deq], + n - deq); + /* Restore the operations */ + for (j = 0; j < n; ++j) { ops[j]->ldpc_dec.op_flags = flags; - ops[j]->ldpc_dec.harq_combined_input = save_hc_in; - ops[j]->ldpc_dec.harq_combined_output = save_hc_out; + ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; + ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; } + } + harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + for (j = 0; j < n; ++j) { /* Adjust HARQ offset when we reach external DDR */ if (mem_in || hc_in) ops[j]->ldpc_dec.harq_combined_input.offset @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct active_device *ad, mbuf_reset( ops_enq[j]->ldpc_dec.harq_combined_output.data); } - if (extDdr) { - bool preload = i == (TEST_REPETITIONS - 1); + if (extDdr) preload_harq_ddr(tp->dev_id, queue_id, ops_enq, - num_ops, preload); - } + num_ops, true); start_time = rte_rdtsc_precise(); for (enq = 0, deq = 0; enq < num_ops;) { @@ -3362,11 +3367,9 @@ typedef int (test_case_function)(struct active_device *ad, mbuf_reset( ops_enq[j]->ldpc_dec.harq_combined_output.data); } - if (extDdr) { - bool preload = i == (TEST_REPETITIONS - 1); + if (extDdr) preload_harq_ddr(tp->dev_id, queue_id, ops_enq, - num_ops, preload); - } + num_ops, true); start_time = rte_rdtsc_precise(); for (enq = 0, deq = 0; enq < num_ops;) { -- 1.8.3.1
Replacing magic number for default wait time for hw offload. Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com> Acked-by: Liu Tianjiao <tianjiao.liu@intel.com> --- app/test-bbdev/test_bbdev_perf.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 969e1d1..fef9d95 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -25,6 +25,7 @@ #define MAX_QUEUES RTE_MAX_LCORE #define TEST_REPETITIONS 1000 +#define WAIT_OFFLOAD_US 1000 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC #include <fpga_lte_fec.h> @@ -4388,7 +4389,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4479,7 +4480,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4567,7 +4568,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4650,7 +4651,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); -- 1.8.3.1
bler test results are not valid when LLR compression is used or for loopback scenarios. Skipping these. Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com> Acked-by: Aidan Goddard <aidan.goddard@accelercomm.com> Acked-by: Dave Burley <dave.burley@accelercomm.com> Reviewed-by: Tom Rix <trix@redhat.com> --- app/test-bbdev/test_bbdev_perf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index fef9d95..01d243a 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -3719,7 +3719,11 @@ typedef int (test_case_function)(struct active_device *ad, RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE)); - if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) + if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && + !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) + && !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_LLR_COMPRESSION)) bler_function = bler_pmd_lcore_ldpc_dec; else return TEST_SKIPPED; -- 1.8.3.1
Reducing number of repetitions from 1000 to 100 to save time. Results are accurate enough with 100 loops. Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com> Acked-by: Liu Tianjiao <tianjiao.liu@intel.com> Reviewed-by: Tom Rix <trix@redhat.com> --- app/test-bbdev/test_bbdev_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 01d243a..66264b3 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -24,7 +24,7 @@ #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) #define MAX_QUEUES RTE_MAX_LCORE -#define TEST_REPETITIONS 1000 +#define TEST_REPETITIONS 100 #define WAIT_OFFLOAD_US 1000 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC -- 1.8.3.1
update offload dequeue to retrieve the full ring to be agnostic of implementation. Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com> Acked-by: Aidan Goddard <aidan.goddard@accelercomm.com> Acked-by: Dave Burley <dave.burley@accelercomm.com> --- app/test-bbdev/test_bbdev_perf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 66264b3..6183193 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -4400,8 +4400,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4491,8 +4491,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4579,8 +4579,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4662,8 +4662,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, -- 1.8.3.1
> Subject: [PATCH v6 0/7] BBDEV test updates
>
> v6: minor refactory suggested by Tom Rix,
> v5: correcting typos again. Quite worrisome spelling disorder.
> v4: rebased on main latest
> v3: apologize again for typo and not double checking with check-git-log
> v2: typos missed in commit messages
> Serie updating and extending the app running the bbdev-test for the existing
> bbdev PMDs.
>
> Nicolas Chautru (7):
> app/bbdev: add explicit ut for latency vs validation
> app/bbdev: add explicit check for counters
> app/bbdev: include explicit HARQ preloading
> app/bbdev: define wait for offload
> app/bbdev: skip bler ut when compression is used
> app/bbdev: reduce duration of throughput test
> app/bbdev: update offload test to dequeue full ring
>
> app/test-bbdev/main.h | 1 +
> app/test-bbdev/test_bbdev_perf.c | 158 +++++++++++++++++++++++++----------
> ----
Series applied to dpdk-next-crypto
Thanks.