From: Kamil Chalupnik <kamilx.chalupnik@intel.com>
To: dev@dpdk.org
Cc: amr.mokhtar@intel.com, pablo.de.lara.guarch@intel.com,
KamilX Chalupnik <kamilx.chalupnik@intel.com>
Subject: [dpdk-dev] [PATCH v2 07/14] bbdev: measure offload cost
Date: Wed, 9 May 2018 16:30:02 +0200 [thread overview]
Message-ID: <20180509143002.16024-1-kamilx.chalupnik@intel.com> (raw)
In-Reply-To: <20180426133008.12388-9-kamilx.chalupnik@intel.com>
From: KamilX Chalupnik <kamilx.chalupnik@intel.com>
New test created to measure offload cost.
Changes were introduced in API, turbo software driver
and test application
Signed-off-by: Kamil Chalupnik <kamilx.chalupnik@intel.com>
Acked-by: Amr Mokhtar <amr.mokhtar@intel.com>
---
app/test-bbdev/test_bbdev_perf.c | 355 ++++++++++++++++++-----
config/common_base | 5 +
drivers/baseband/turbo_sw/bbdev_turbo_software.c | 55 +++-
lib/librte_bbdev/rte_bbdev.h | 2 +
4 files changed, 338 insertions(+), 79 deletions(-)
diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index f358083..a7f97ff 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -84,6 +84,30 @@ struct thread_params {
struct test_op_params *op_params;
};
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+/* Stores time statistics */
+struct test_time_stats {
+ /* Stores software enqueue total working time */
+ uint64_t enq_sw_tot_time;
+ /* Stores minimum value of software enqueue working time */
+ uint64_t enq_sw_min_time;
+ /* Stores maximum value of software enqueue working time */
+ uint64_t enq_sw_max_time;
+ /* Stores turbo enqueue total working time */
+ uint64_t enq_tur_tot_time;
+ /* Stores minimum value of turbo enqueue working time */
+ uint64_t enq_tur_min_time;
+ /* Stores maximum value of turbo enqueue working time */
+ uint64_t enq_tur_max_time;
+ /* Stores dequeue total working time */
+ uint64_t deq_tot_time;
+ /* Stores minimum value of dequeue working time */
+ uint64_t deq_min_time;
+ /* Stores maximum value of dequeue working time */
+ uint64_t deq_max_time;
+};
+#endif
+
typedef int (test_case_function)(struct active_device *ad,
struct test_op_params *op_params);
@@ -1144,7 +1168,6 @@ dequeue_event_callback(uint16_t dev_id,
double in_len;
struct thread_params *tp = cb_arg;
-
RTE_SET_USED(ret_param);
queue_id = tp->queue_id;
@@ -1689,20 +1712,21 @@ throughput_test(struct active_device *ad,
}
static int
-operation_latency_test_dec(struct rte_mempool *mempool,
+latency_test_dec(struct rte_mempool *mempool,
struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
int vector_mask, uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
- uint64_t *total_time)
+ uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
{
int ret = TEST_SUCCESS;
uint16_t i, j, dequeued;
struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
- uint64_t start_time = 0;
+ uint64_t start_time = 0, last_time = 0;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
bool first_time = true;
+ last_time = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
@@ -1732,11 +1756,15 @@ operation_latency_test_dec(struct rte_mempool *mempool,
deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
if (likely(first_time && (deq > 0))) {
- *total_time += rte_rdtsc_precise() - start_time;
+ last_time = rte_rdtsc_precise() - start_time;
first_time = false;
}
} while (unlikely(burst_sz != deq));
+ *max_time = RTE_MAX(*max_time, last_time);
+ *min_time = RTE_MIN(*min_time, last_time);
+ *total_time += last_time;
+
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_dec_op(ops_deq, burst_sz, ref_op,
vector_mask);
@@ -1751,20 +1779,21 @@ operation_latency_test_dec(struct rte_mempool *mempool,
}
static int
-operation_latency_test_enc(struct rte_mempool *mempool,
+latency_test_enc(struct rte_mempool *mempool,
struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
- uint64_t *total_time)
+ uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
{
int ret = TEST_SUCCESS;
uint16_t i, j, dequeued;
struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
- uint64_t start_time = 0;
+ uint64_t start_time = 0, last_time = 0;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
bool first_time = true;
+ last_time = 0;
if (unlikely(num_to_process - dequeued < burst_sz))
burst_sz = num_to_process - dequeued;
@@ -1793,11 +1822,15 @@ operation_latency_test_enc(struct rte_mempool *mempool,
deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
if (likely(first_time && (deq > 0))) {
- *total_time += rte_rdtsc_precise() - start_time;
+ last_time += rte_rdtsc_precise() - start_time;
first_time = false;
}
} while (unlikely(burst_sz != deq));
+ *max_time = RTE_MAX(*max_time, last_time);
+ *min_time = RTE_MIN(*min_time, last_time);
+ *total_time += last_time;
+
if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
ret = validate_enc_op(ops_deq, burst_sz, ref_op);
TEST_ASSERT_SUCCESS(ret, "Validation failed!");
@@ -1811,7 +1844,7 @@ operation_latency_test_enc(struct rte_mempool *mempool,
}
static int
-operation_latency_test(struct active_device *ad,
+latency_test(struct active_device *ad,
struct test_op_params *op_params)
{
int iter;
@@ -1821,9 +1854,12 @@ operation_latency_test(struct active_device *ad,
const uint16_t queue_id = ad->queue_ids[0];
struct test_buffers *bufs = NULL;
struct rte_bbdev_info info;
- uint64_t total_time = 0;
+ uint64_t total_time, min_time, max_time;
const char *op_type_str;
+ total_time = max_time = 0;
+ min_time = UINT64_MAX;
+
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
@@ -1838,36 +1874,66 @@ operation_latency_test(struct active_device *ad,
info.dev_name, burst_sz, num_to_process, op_type_str);
if (op_type == RTE_BBDEV_OP_TURBO_DEC)
- iter = operation_latency_test_dec(op_params->mp, bufs,
+ iter = latency_test_dec(op_params->mp, bufs,
op_params->ref_dec_op, op_params->vector_mask,
ad->dev_id, queue_id, num_to_process,
- burst_sz, &total_time);
+ burst_sz, &total_time, &min_time, &max_time);
else
- iter = operation_latency_test_enc(op_params->mp, bufs,
+ iter = latency_test_enc(op_params->mp, bufs,
op_params->ref_enc_op, ad->dev_id, queue_id,
- num_to_process, burst_sz, &total_time);
+ num_to_process, burst_sz, &total_time,
+ &min_time, &max_time);
if (iter <= 0)
return TEST_FAILED;
- printf("\toperation avg. latency: %lg cycles, %lg us\n",
+ printf("\toperation latency:\n"
+ "\t\tavg latency: %lg cycles, %lg us\n"
+ "\t\tmin latency: %lg cycles, %lg us\n"
+ "\t\tmax latency: %lg cycles, %lg us\n",
(double)total_time / (double)iter,
(double)(total_time * 1000000) / (double)iter /
+ (double)rte_get_tsc_hz(), (double)min_time,
+ (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
+ (double)max_time, (double)(max_time * 1000000) /
(double)rte_get_tsc_hz());
return TEST_SUCCESS;
}
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+static int
+get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
+ struct rte_bbdev_stats *stats)
+{
+ struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+ struct rte_bbdev_stats *q_stats;
+
+ if (queue_id >= dev->data->num_queues)
+ return -1;
+
+ q_stats = &dev->data->queues[queue_id].queue_stats;
+
+ stats->enqueued_count = q_stats->enqueued_count;
+ stats->dequeued_count = q_stats->dequeued_count;
+ stats->enqueue_err_count = q_stats->enqueue_err_count;
+ stats->dequeue_err_count = q_stats->dequeue_err_count;
+ stats->offload_time = q_stats->offload_time;
+
+ return 0;
+}
+
static int
offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
uint16_t queue_id, const uint16_t num_to_process,
- uint16_t burst_sz, uint64_t *enq_total_time,
- uint64_t *deq_total_time)
+ uint16_t burst_sz, struct test_time_stats *time_st)
{
- int i, dequeued;
+ int i, dequeued, ret;
struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
uint64_t enq_start_time, deq_start_time;
+ uint64_t enq_sw_last_time, deq_last_time;
+ struct rte_bbdev_stats stats;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
@@ -1883,24 +1949,54 @@ offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
bufs->soft_outputs,
ref_op);
- /* Start time measurment for enqueue function offload latency */
- enq_start_time = rte_rdtsc();
+ /* Start time meas for enqueue function offload latency */
+ enq_start_time = rte_rdtsc_precise();
do {
enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz - enq);
} while (unlikely(burst_sz != enq));
- *enq_total_time += rte_rdtsc() - enq_start_time;
+
+ ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
+ TEST_ASSERT_SUCCESS(ret,
+ "Failed to get stats for queue (%u) of device (%u)",
+ queue_id, dev_id);
+
+ enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
+ stats.offload_time;
+ time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
+ enq_sw_last_time);
+ time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
+ enq_sw_last_time);
+ time_st->enq_sw_tot_time += enq_sw_last_time;
+
+ time_st->enq_tur_max_time = RTE_MAX(time_st->enq_tur_max_time,
+ stats.offload_time);
+ time_st->enq_tur_min_time = RTE_MIN(time_st->enq_tur_min_time,
+ stats.offload_time);
+ time_st->enq_tur_tot_time += stats.offload_time;
/* ensure enqueue has been completed */
rte_delay_ms(10);
- /* Start time measurment for dequeue function offload latency */
- deq_start_time = rte_rdtsc();
+ /* Start time meas for dequeue function offload latency */
+ deq_start_time = rte_rdtsc_precise();
+ /* Dequeue one operation */
do {
deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
+ &ops_deq[deq], 1);
+ } while (unlikely(deq != 1));
+
+ deq_last_time = rte_rdtsc_precise() - deq_start_time;
+ time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
+ deq_last_time);
+ time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
+ deq_last_time);
+ time_st->deq_tot_time += deq_last_time;
+
+ /* Dequeue remaining operations if needed*/
+ while (burst_sz != deq)
+ deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
- } while (unlikely(burst_sz != deq));
- *deq_total_time += rte_rdtsc() - deq_start_time;
rte_bbdev_dec_op_free_bulk(ops_enq, deq);
dequeued += deq;
@@ -1913,12 +2009,13 @@ static int
offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
uint16_t queue_id, const uint16_t num_to_process,
- uint16_t burst_sz, uint64_t *enq_total_time,
- uint64_t *deq_total_time)
+ uint16_t burst_sz, struct test_time_stats *time_st)
{
- int i, dequeued;
+ int i, dequeued, ret;
struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
uint64_t enq_start_time, deq_start_time;
+ uint64_t enq_sw_last_time, deq_last_time;
+ struct rte_bbdev_stats stats;
for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
uint16_t enq = 0, deq = 0;
@@ -1933,24 +2030,53 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
bufs->hard_outputs,
ref_op);
- /* Start time measurment for enqueue function offload latency */
- enq_start_time = rte_rdtsc();
+ /* Start time meas for enqueue function offload latency */
+ enq_start_time = rte_rdtsc_precise();
do {
enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
&ops_enq[enq], burst_sz - enq);
} while (unlikely(burst_sz != enq));
- *enq_total_time += rte_rdtsc() - enq_start_time;
+
+ ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
+ TEST_ASSERT_SUCCESS(ret,
+ "Failed to get stats for queue (%u) of device (%u)",
+ queue_id, dev_id);
+
+ enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
+ stats.offload_time;
+ time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
+ enq_sw_last_time);
+ time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
+ enq_sw_last_time);
+ time_st->enq_sw_tot_time += enq_sw_last_time;
+
+ time_st->enq_tur_max_time = RTE_MAX(time_st->enq_tur_max_time,
+ stats.offload_time);
+ time_st->enq_tur_min_time = RTE_MIN(time_st->enq_tur_min_time,
+ stats.offload_time);
+ time_st->enq_tur_tot_time += stats.offload_time;
/* ensure enqueue has been completed */
rte_delay_ms(10);
- /* Start time measurment for dequeue function offload latency */
- deq_start_time = rte_rdtsc();
+ /* Start time meas for dequeue function offload latency */
+ deq_start_time = rte_rdtsc_precise();
+ /* Dequeue one operation */
do {
deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
+ &ops_deq[deq], 1);
+ } while (unlikely(deq != 1));
+
+ deq_last_time = rte_rdtsc_precise() - deq_start_time;
+ time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
+ deq_last_time);
+ time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
+ deq_last_time);
+ time_st->deq_tot_time += deq_last_time;
+
+ while (burst_sz != deq)
+ deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
&ops_deq[deq], burst_sz - deq);
- } while (unlikely(burst_sz != deq));
- *deq_total_time += rte_rdtsc() - deq_start_time;
rte_bbdev_enc_op_free_bulk(ops_enq, deq);
dequeued += deq;
@@ -1958,13 +2084,20 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
return i;
}
+#endif
static int
-offload_latency_test(struct active_device *ad,
+offload_cost_test(struct active_device *ad,
struct test_op_params *op_params)
{
+#ifndef RTE_TEST_BBDEV_OFFLOAD_COST
+ RTE_SET_USED(ad);
+ RTE_SET_USED(op_params);
+ printf(
+ "Offload latency test is disabled. Set RTE_TEST_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
+ return TEST_SKIPPED;
+#else
int iter;
- uint64_t enq_total_time = 0, deq_total_time = 0;
uint16_t burst_sz = op_params->burst_sz;
const uint16_t num_to_process = op_params->num_to_process;
const enum rte_bbdev_op_type op_type = test_vector.op_type;
@@ -1972,6 +2105,12 @@ offload_latency_test(struct active_device *ad,
struct test_buffers *bufs = NULL;
struct rte_bbdev_info info;
const char *op_type_str;
+ struct test_time_stats time_st;
+
+ memset(&time_st, 0, sizeof(struct test_time_stats));
+ time_st.enq_sw_min_time = UINT64_MAX;
+ time_st.enq_tur_min_time = UINT64_MAX;
+ time_st.deq_min_time = UINT64_MAX;
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
@@ -1989,48 +2128,82 @@ offload_latency_test(struct active_device *ad,
if (op_type == RTE_BBDEV_OP_TURBO_DEC)
iter = offload_latency_test_dec(op_params->mp, bufs,
op_params->ref_dec_op, ad->dev_id, queue_id,
- num_to_process, burst_sz, &enq_total_time,
- &deq_total_time);
+ num_to_process, burst_sz, &time_st);
else
iter = offload_latency_test_enc(op_params->mp, bufs,
op_params->ref_enc_op, ad->dev_id, queue_id,
- num_to_process, burst_sz, &enq_total_time,
- &deq_total_time);
+ num_to_process, burst_sz, &time_st);
if (iter <= 0)
return TEST_FAILED;
- printf("\tenq offload avg. latency: %lg cycles, %lg us\n",
- (double)enq_total_time / (double)iter,
- (double)(enq_total_time * 1000000) / (double)iter /
- (double)rte_get_tsc_hz());
-
- printf("\tdeq offload avg. latency: %lg cycles, %lg us\n",
- (double)deq_total_time / (double)iter,
- (double)(deq_total_time * 1000000) / (double)iter /
- (double)rte_get_tsc_hz());
+ printf("\tenq offload cost latency:\n"
+ "\t\tsoftware avg %lg cycles, %lg us\n"
+ "\t\tsoftware min %lg cycles, %lg us\n"
+ "\t\tsoftware max %lg cycles, %lg us\n"
+ "\t\tturbo avg %lg cycles, %lg us\n"
+ "\t\tturbo min %lg cycles, %lg us\n"
+ "\t\tturbo max %lg cycles, %lg us\n",
+ (double)time_st.enq_sw_tot_time / (double)iter,
+ (double)(time_st.enq_sw_tot_time * 1000000) /
+ (double)iter / (double)rte_get_tsc_hz(),
+ (double)time_st.enq_sw_min_time,
+ (double)(time_st.enq_sw_min_time * 1000000) /
+ rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
+ (double)(time_st.enq_sw_max_time * 1000000) /
+ rte_get_tsc_hz(), (double)time_st.enq_tur_tot_time /
+ (double)iter,
+ (double)(time_st.enq_tur_tot_time * 1000000) /
+ (double)iter / (double)rte_get_tsc_hz(),
+ (double)time_st.enq_tur_min_time,
+ (double)(time_st.enq_tur_min_time * 1000000) /
+ rte_get_tsc_hz(), (double)time_st.enq_tur_max_time,
+ (double)(time_st.enq_tur_max_time * 1000000) /
+ rte_get_tsc_hz());
+
+ printf("\tdeq offload cost latency - one op:\n"
+ "\t\tavg %lg cycles, %lg us\n"
+ "\t\tmin %lg cycles, %lg us\n"
+ "\t\tmax %lg cycles, %lg us\n",
+ (double)time_st.deq_tot_time / (double)iter,
+ (double)(time_st.deq_tot_time * 1000000) /
+ (double)iter / (double)rte_get_tsc_hz(),
+ (double)time_st.deq_min_time,
+ (double)(time_st.deq_min_time * 1000000) /
+ rte_get_tsc_hz(), (double)time_st.deq_max_time,
+ (double)(time_st.deq_max_time * 1000000) /
+ rte_get_tsc_hz());
return TEST_SUCCESS;
+#endif
}
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
static int
offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
- uint64_t *deq_total_time)
+ uint64_t *deq_tot_time, uint64_t *deq_min_time,
+ uint64_t *deq_max_time)
{
int i, deq_total;
struct rte_bbdev_dec_op *ops[MAX_BURST];
- uint64_t deq_start_time;
+ uint64_t deq_start_time, deq_last_time;
/* Test deq offload latency from an empty queue */
- deq_start_time = rte_rdtsc_precise();
+
for (i = 0, deq_total = 0; deq_total < num_to_process;
++i, deq_total += burst_sz) {
+ deq_start_time = rte_rdtsc_precise();
+
if (unlikely(num_to_process - deq_total < burst_sz))
burst_sz = num_to_process - deq_total;
rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
+
+ deq_last_time = rte_rdtsc_precise() - deq_start_time;
+ *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
+ *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
+ *deq_tot_time += deq_last_time;
}
- *deq_total_time = rte_rdtsc_precise() - deq_start_time;
return i;
}
@@ -2038,31 +2211,45 @@ offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
static int
offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
const uint16_t num_to_process, uint16_t burst_sz,
- uint64_t *deq_total_time)
+ uint64_t *deq_tot_time, uint64_t *deq_min_time,
+ uint64_t *deq_max_time)
{
int i, deq_total;
struct rte_bbdev_enc_op *ops[MAX_BURST];
- uint64_t deq_start_time;
+ uint64_t deq_start_time, deq_last_time;
/* Test deq offload latency from an empty queue */
- deq_start_time = rte_rdtsc_precise();
for (i = 0, deq_total = 0; deq_total < num_to_process;
++i, deq_total += burst_sz) {
+ deq_start_time = rte_rdtsc_precise();
+
if (unlikely(num_to_process - deq_total < burst_sz))
burst_sz = num_to_process - deq_total;
rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
+
+ deq_last_time = rte_rdtsc_precise() - deq_start_time;
+ *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
+ *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
+ *deq_tot_time += deq_last_time;
}
- *deq_total_time = rte_rdtsc_precise() - deq_start_time;
return i;
}
+#endif
static int
offload_latency_empty_q_test(struct active_device *ad,
struct test_op_params *op_params)
{
+#ifndef RTE_TEST_BBDEV_OFFLOAD_COST
+ RTE_SET_USED(ad);
+ RTE_SET_USED(op_params);
+ printf(
+ "Offload latency empty dequeue test is disabled. Set RTE_TEST_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
+ return TEST_SKIPPED;
+#else
int iter;
- uint64_t deq_total_time = 0;
+ uint64_t deq_tot_time, deq_min_time, deq_max_time;
uint16_t burst_sz = op_params->burst_sz;
const uint16_t num_to_process = op_params->num_to_process;
const enum rte_bbdev_op_type op_type = test_vector.op_type;
@@ -2070,6 +2257,9 @@ offload_latency_empty_q_test(struct active_device *ad,
struct rte_bbdev_info info;
const char *op_type_str;
+ deq_tot_time = deq_max_time = 0;
+ deq_min_time = UINT64_MAX;
+
TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
"BURST_SIZE should be <= %u", MAX_BURST);
@@ -2084,20 +2274,29 @@ offload_latency_empty_q_test(struct active_device *ad,
if (op_type == RTE_BBDEV_OP_TURBO_DEC)
iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
- num_to_process, burst_sz, &deq_total_time);
+ num_to_process, burst_sz, &deq_tot_time,
+ &deq_min_time, &deq_max_time);
else
iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
- num_to_process, burst_sz, &deq_total_time);
+ num_to_process, burst_sz, &deq_tot_time,
+ &deq_min_time, &deq_max_time);
if (iter <= 0)
return TEST_FAILED;
- printf("\tempty deq offload avg. latency: %lg cycles, %lg us\n",
- (double)deq_total_time / (double)iter,
- (double)(deq_total_time * 1000000) / (double)iter /
- (double)rte_get_tsc_hz());
+ printf("\tempty deq offload\n"
+ "\t\tavg. latency: %lg cycles, %lg us\n"
+ "\t\tmin. latency: %lg cycles, %lg us\n"
+ "\t\tmax. latency: %lg cycles, %lg us\n",
+ (double)deq_tot_time / (double)iter,
+ (double)(deq_tot_time * 1000000) / (double)iter /
+ (double)rte_get_tsc_hz(), (double)deq_min_time,
+ (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
+ (double)deq_max_time, (double)(deq_max_time * 1000000) /
+ rte_get_tsc_hz());
return TEST_SUCCESS;
+#endif
}
static int
@@ -2107,9 +2306,9 @@ throughput_tc(void)
}
static int
-offload_latency_tc(void)
+offload_cost_tc(void)
{
- return run_test_case(offload_latency_test);
+ return run_test_case(offload_cost_test);
}
static int
@@ -2119,9 +2318,9 @@ offload_latency_empty_q_tc(void)
}
static int
-operation_latency_tc(void)
+latency_tc(void)
{
- return run_test_case(operation_latency_test);
+ return run_test_case(latency_test);
}
static int
@@ -2145,7 +2344,7 @@ static struct unit_test_suite bbdev_validation_testsuite = {
.setup = testsuite_setup,
.teardown = testsuite_teardown,
.unit_test_cases = {
- TEST_CASE_ST(ut_setup, ut_teardown, operation_latency_tc),
+ TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
@@ -2155,9 +2354,18 @@ static struct unit_test_suite bbdev_latency_testsuite = {
.setup = testsuite_setup,
.teardown = testsuite_teardown,
.unit_test_cases = {
- TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_tc),
+ TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
+ TEST_CASES_END() /**< NULL terminate unit test array */
+ }
+};
+
+static struct unit_test_suite bbdev_offload_cost_testsuite = {
+ .suite_name = "BBdev Offload Cost Tests",
+ .setup = testsuite_setup,
+ .teardown = testsuite_teardown,
+ .unit_test_cases = {
+ TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
- TEST_CASE_ST(ut_setup, ut_teardown, operation_latency_tc),
TEST_CASES_END() /**< NULL terminate unit test array */
}
};
@@ -2175,4 +2383,5 @@ static struct unit_test_suite bbdev_interrupt_testsuite = {
REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
+REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
diff --git a/config/common_base b/config/common_base
index ee10b44..48f83c4 100644
--- a/config/common_base
+++ b/config/common_base
@@ -819,6 +819,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
CONFIG_RTE_TEST_BBDEV=y
#
+# Compile the bbdev offload cost test
+#
+CONFIG_RTE_TEST_BBDEV_OFFLOAD_COST=n
+
+#
# Compile the crypto performance application
#
CONFIG_RTE_APP_CRYPTO_PERF=y
diff --git a/drivers/baseband/turbo_sw/bbdev_turbo_software.c b/drivers/baseband/turbo_sw/bbdev_turbo_software.c
index b9bb37c..bbb4e40 100644
--- a/drivers/baseband/turbo_sw/bbdev_turbo_software.c
+++ b/drivers/baseband/turbo_sw/bbdev_turbo_software.c
@@ -9,6 +9,7 @@
#include <rte_malloc.h>
#include <rte_ring.h>
#include <rte_kvargs.h>
+#include <rte_cycles.h>
#include <rte_bbdev.h>
#include <rte_bbdev_pmd.h>
@@ -455,7 +456,8 @@ static inline void
process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out,
- uint16_t in_offset, uint16_t out_offset, uint16_t total_left)
+ uint16_t in_offset, uint16_t out_offset, uint16_t total_left,
+ struct rte_bbdev_stats *q_stats)
{
int ret;
int16_t k_idx;
@@ -469,6 +471,11 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
struct bblib_turbo_encoder_response turbo_resp;
struct bblib_rate_match_dl_request rm_req;
struct bblib_rate_match_dl_response rm_resp;
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ uint64_t start_time;
+#else
+ RTE_SET_USED(q_stats);
+#endif
k_idx = compute_idx(k);
in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
@@ -499,7 +506,13 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
}
crc_resp.data = in;
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ start_time = rte_rdtsc_precise();
+#endif
bblib_lte_crc24a_gen(&crc_req, &crc_resp);
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ q_stats->offload_time += rte_rdtsc_precise() - start_time;
+#endif
} else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
/* CRC24B */
ret = is_enc_input_valid(k - 24, k_idx, total_left);
@@ -525,7 +538,13 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
}
crc_resp.data = in;
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ start_time = rte_rdtsc_precise();
+#endif
bblib_lte_crc24b_gen(&crc_req, &crc_resp);
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ q_stats->offload_time += rte_rdtsc_precise() - start_time;
+#endif
} else {
ret = is_enc_input_valid(k, k_idx, total_left);
if (ret != 0) {
@@ -572,12 +591,21 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
turbo_resp.output_win_0 = out0;
turbo_resp.output_win_1 = out1;
turbo_resp.output_win_2 = out2;
+
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ start_time = rte_rdtsc_precise();
+#endif
+
if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) {
op->status |= 1 << RTE_BBDEV_DRV_ERROR;
rte_bbdev_log(ERR, "Turbo Encoder failed");
return;
}
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ q_stats->offload_time += rte_rdtsc_precise() - start_time;
+#endif
+
/* Restore 3 first bytes of next CB if they were overwritten by CRC*/
if (first_3_bytes != 0)
*((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes;
@@ -639,6 +667,10 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
else
rm_req.bypass_rvidx = 0;
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ start_time = rte_rdtsc_precise();
+#endif
+
if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) {
op->status |= 1 << RTE_BBDEV_DRV_ERROR;
rte_bbdev_log(ERR, "Rate matching failed");
@@ -651,6 +683,10 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
mask_id = (e & 7) >> 1;
rm_out[out_len - 1] &= mask_out[mask_id];
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ q_stats->offload_time += rte_rdtsc_precise() - start_time;
+#endif
+
enc->output.length += rm_resp.OutputLen;
} else {
/* Rate matching is bypassed */
@@ -678,7 +714,8 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
}
static inline void
-enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op)
+enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
+ struct rte_bbdev_stats *queue_stats)
{
uint8_t c, r, crc24_bits = 0;
uint16_t k, ncb;
@@ -733,7 +770,8 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op)
}
process_enc_cb(q, op, r, c, k, ncb, e, m_in,
- m_out, in_offset, out_offset, total_left);
+ m_out, in_offset, out_offset, total_left,
+ queue_stats);
/* Update total_left */
total_left -= (k - crc24_bits) >> 3;
/* Update offsets for next CBs (if exist) */
@@ -755,12 +793,15 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op)
static inline uint16_t
enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
- uint16_t nb_ops)
+ uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
{
uint16_t i;
+#ifdef RTE_TEST_BBDEV_OFFLOAD_COST
+ queue_stats->offload_time = 0;
+#endif
for (i = 0; i < nb_ops; ++i)
- enqueue_enc_one_op(q, ops[i]);
+ enqueue_enc_one_op(q, ops[i], queue_stats);
return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
NULL);
@@ -939,6 +980,8 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
turbo_req.k = k;
turbo_req.k_idx = k_idx;
turbo_req.max_iter_num = dec->iter_max;
+ turbo_req.early_term_disable = !check_bit(dec->op_flags,
+ RTE_BBDEV_TURBO_EARLY_TERMINATION);
turbo_resp.ag_buf = q->ag;
turbo_resp.cb_buf = q->code_block;
turbo_resp.output = out;
@@ -1051,7 +1094,7 @@ enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
struct turbo_sw_queue *q = queue;
uint16_t nb_enqueued = 0;
- nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops);
+ nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats);
q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
q_data->queue_stats.enqueued_count += nb_enqueued;
diff --git a/lib/librte_bbdev/rte_bbdev.h b/lib/librte_bbdev/rte_bbdev.h
index 5e7e495..bdcd1d0 100644
--- a/lib/librte_bbdev/rte_bbdev.h
+++ b/lib/librte_bbdev/rte_bbdev.h
@@ -239,6 +239,8 @@ struct rte_bbdev_stats {
uint64_t enqueue_err_count;
/** Total error count on operations dequeued */
uint64_t dequeue_err_count;
+ /** It stores offload time. */
+ uint64_t offload_time;
};
/**
--
2.5.5
next prev parent reply other threads:[~2018-05-09 14:30 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-26 13:29 [dpdk-dev] [PATCH 01/13] baseband/turbo_sw: update DPDK to work with FlexRAN 1.4.0 Kamil Chalupnik
2018-04-26 13:29 ` [dpdk-dev] [PATCH 02/13] doc/turbo_sw: update Wireless Baseband Device documentation Kamil Chalupnik
2018-05-07 13:37 ` De Lara Guarch, Pablo
2018-05-09 14:46 ` [dpdk-dev] [PATCH v2 12/14] " Kamil Chalupnik
2018-04-26 13:29 ` [dpdk-dev] [PATCH 03/13] doc/bbdev: dynamic lib support Kamil Chalupnik
2018-05-09 14:51 ` [dpdk-dev] [PATCH v2 13/14] " Kamil Chalupnik
2018-04-26 13:29 ` [dpdk-dev] [PATCH 04/13] baseband/turbo_sw: memcpy changed or removed from driver Kamil Chalupnik
2018-05-07 12:26 ` De Lara Guarch, Pablo
2018-05-09 14:17 ` [dpdk-dev] [PATCH v2 02/14] baseband/turbo_sw: memory copying optimized or removed Kamil Chalupnik
2018-04-26 13:29 ` [dpdk-dev] [PATCH 05/13] baseband/turbo_sw: scalling input LLR to range [-16 16] Kamil Chalupnik
2018-05-07 12:50 ` De Lara Guarch, Pablo
2018-05-09 14:23 ` [dpdk-dev] [PATCH v2 04/14] baseband/turbo_sw: scalling likelihood ratio (LLR) input Kamil Chalupnik
2018-04-26 13:30 ` [dpdk-dev] [PATCH 06/13] baseband/turbo_sw: increase internal buffers Kamil Chalupnik
2018-05-09 14:25 ` [dpdk-dev] [PATCH v2 05/14] " Kamil Chalupnik
2018-04-26 13:30 ` [dpdk-dev] [PATCH 07/13] baseband/turbo_sw: support for optional CRC overlap Kamil Chalupnik
2018-05-09 14:28 ` [dpdk-dev] [PATCH v2 06/14] " Kamil Chalupnik
2018-04-26 13:30 ` [dpdk-dev] [PATCH 08/13] app/bbdev: update test vectors names Kamil Chalupnik
2018-05-07 13:15 ` De Lara Guarch, Pablo
2018-05-09 14:37 ` [dpdk-dev] [PATCH v2 09/14] " Kamil Chalupnik
2018-04-26 13:30 ` [dpdk-dev] [PATCH 09/13] bbdev: measure offload cost Kamil Chalupnik
2018-05-07 13:29 ` De Lara Guarch, Pablo
[not found] ` <EEA9FF629BF25B47BD67ADE995041EE23D0352A7@IRSMSX103.ger.corp.intel.com>
2018-05-08 9:08 ` De Lara Guarch, Pablo
[not found] ` <EEA9FF629BF25B47BD67ADE995041EE23D035350@IRSMSX103.ger.corp.intel.com>
2018-05-08 10:16 ` De Lara Guarch, Pablo
2018-05-09 14:30 ` Kamil Chalupnik [this message]
2018-04-26 13:30 ` [dpdk-dev] [PATCH 10/13] doc: update tests and usage of test app description Kamil Chalupnik
2018-05-09 14:55 ` [dpdk-dev] [PATCH v2 14/14] " Kamil Chalupnik
2018-04-26 13:30 ` [dpdk-dev] [PATCH 11/13] app/bbdev: added new test vectors Kamil Chalupnik
2018-05-09 14:39 ` [dpdk-dev] [PATCH v2 10/14] " Kamil Chalupnik
2018-04-26 13:30 ` [dpdk-dev] [PATCH 12/13] bbdev: split queue groups Kamil Chalupnik
2018-05-09 14:35 ` [dpdk-dev] [PATCH v2 08/14] " Kamil Chalupnik
2018-04-26 13:30 ` [dpdk-dev] [PATCH 13/13] app/bbdev: improve readability of test application Kamil Chalupnik
2018-05-09 14:42 ` [dpdk-dev] [PATCH v2 11/14] " Kamil Chalupnik
2018-05-09 18:55 ` De Lara Guarch, Pablo
2018-04-26 13:30 ` [dpdk-dev] [PATCH 00/13] Documentation and Turbo Software Baseband Device Update Kamil Chalupnik
2018-05-09 14:14 ` [dpdk-dev] [PATCH v2 01/14] baseband/turbo_sw: update DPDK to work with FlexRAN 1.4.0 Kamil Chalupnik
2018-05-09 14:14 ` [dpdk-dev] [PATCH v2 00/14] Documentation and Turbo Software Baseband Device Update Kamil Chalupnik
2018-05-09 19:21 ` De Lara Guarch, Pablo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180509143002.16024-1-kamilx.chalupnik@intel.com \
--to=kamilx.chalupnik@intel.com \
--cc=amr.mokhtar@intel.com \
--cc=dev@dpdk.org \
--cc=pablo.de.lara.guarch@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).