From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id A66735F1D for ; Fri, 7 Dec 2018 16:16:04 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 07 Dec 2018 07:16:04 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,326,1539673200"; d="scan'208";a="127991495" Received: from kchalupx-mobl.ger.corp.intel.com ([10.103.104.172]) by fmsmga001.fm.intel.com with ESMTP; 07 Dec 2018 07:16:02 -0800 From: Kamil Chalupnik To: dev@dpdk.org Cc: amr.mokhtar@intel.com, akhil.goyal@nxp.com, Kamil Chalupnik Date: Fri, 7 Dec 2018 16:15:33 +0100 Message-Id: <20181207151534.16428-3-kamilx.chalupnik@intel.com> X-Mailer: git-send-email 2.9.0.windows.1 In-Reply-To: <20181207151534.16428-1-kamilx.chalupnik@intel.com> References: <20181207143126.3876-1-kamilx.chalupnik@intel.com> <20181207151534.16428-1-kamilx.chalupnik@intel.com> Subject: [dpdk-dev] [PATCH v2 3/4] baseband: support bigger Transpor Block X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 07 Dec 2018 15:16:05 -0000 Test application and Turbo Software driver were adapted to support chained-mbuf for bigger TB sizes. Signed-off-by: Kamil Chalupnik --- app/test-bbdev/test_bbdev_perf.c | 60 +++++++++--- drivers/baseband/turbo_sw/bbdev_turbo_software.c | 111 ++++++++++++++++------- 2 files changed, 126 insertions(+), 45 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index a25e3a7..5bec70d 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -114,6 +114,17 @@ typedef int (test_case_function)(struct active_device *ad, struct test_op_params *op_params); static inline void +mbuf_reset(struct rte_mbuf *m) +{ + m->pkt_len = 0; + + do { + m->data_len = 0; + m = m->next; + } while (m != NULL); +} + +static inline void set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) { ad->supported_ops |= (1 << op_type); @@ -573,6 +584,10 @@ typedef int (test_case_function)(struct active_device *ad, op_type, n * ref_entries->nb_segments, mbuf_pool->size); + TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) > + (uint32_t)UINT16_MAX), + "Given data is bigger than allowed mbuf segment size"); + bufs[i].data = m_head; bufs[i].offset = 0; bufs[i].length = 0; @@ -589,7 +604,6 @@ typedef int (test_case_function)(struct active_device *ad, rte_memcpy(data, seg->addr, seg->length); bufs[i].length += seg->length; - for (j = 1; j < ref_entries->nb_segments; ++j) { struct rte_mbuf *m_tail = rte_pktmbuf_alloc(mbuf_pool); @@ -617,6 +631,24 @@ typedef int (test_case_function)(struct active_device *ad, "Couldn't chain mbufs from %d data type mbuf pool", op_type); } + + } else { + + /* allocate chained-mbuf for output buffer */ + for (j = 1; j < ref_entries->nb_segments; ++j) { + struct rte_mbuf *m_tail = + rte_pktmbuf_alloc(mbuf_pool); + TEST_ASSERT_NOT_NULL(m_tail, + "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", + op_type, + n * ref_entries->nb_segments, + mbuf_pool->size); + + ret = rte_pktmbuf_chain(m_head, m_tail); + TEST_ASSERT_SUCCESS(ret, + "Couldn't chain mbufs from %d data type mbuf pool", + op_type); + } } } @@ -655,7 +687,7 @@ typedef int (test_case_function)(struct active_device *ad, while (m != NULL) { int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, input_ops[i].offset); - for (byte_idx = 0; byte_idx < input_ops[i].length; + for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); ++byte_idx) llr[byte_idx] = round((double)max_llr_modulus * llr[byte_idx] / INT8_MAX); @@ -864,15 +896,18 @@ typedef int (test_case_function)(struct active_device *ad, uint8_t i; struct rte_mbuf *m = op->data; uint8_t nb_dst_segments = orig_op->nb_segments; + uint32_t total_data_size = 0; TEST_ASSERT(nb_dst_segments == m->nb_segs, "Number of segments differ in original (%u) and filled (%u) op", nb_dst_segments, m->nb_segs); + /* Validate each mbuf segment length */ for (i = 0; i < nb_dst_segments; ++i) { /* Apply offset to the first mbuf segment */ uint16_t offset = (i == 0) ? op->offset : 0; - uint16_t data_len = m->data_len - offset; + uint16_t data_len = rte_pktmbuf_data_len(m) - offset; + total_data_size += orig_op->segments[i].length; TEST_ASSERT(orig_op->segments[i].length == data_len, "Length of segment differ in original (%u) and filled (%u) op", @@ -884,6 +919,12 @@ typedef int (test_case_function)(struct active_device *ad, m = m->next; } + /* Validate total mbuf pkt length */ + uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; + TEST_ASSERT(total_data_size == pkt_len, + "Length of data differ in original (%u) and filled (%u) op", + total_data_size, pkt_len); + return TEST_SUCCESS; } @@ -1427,10 +1468,8 @@ typedef int (test_case_function)(struct active_device *ad, for (i = 0; i < TEST_REPETITIONS; ++i) { - for (j = 0; j < num_ops; ++j) { - struct rte_bbdev_dec_op *op = ops_enq[j]; - rte_pktmbuf_reset(op->turbo_dec.hard_output.data); - } + for (j = 0; j < num_ops; ++j) + mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); start_time = rte_rdtsc_precise(); @@ -1529,8 +1568,7 @@ typedef int (test_case_function)(struct active_device *ad, if (test_vector.op_type != RTE_BBDEV_OP_NONE) for (j = 0; j < num_ops; ++j) - rte_pktmbuf_reset( - ops_enq[j]->turbo_enc.output.data); + mbuf_reset(ops_enq[j]->turbo_enc.output.data); start_time = rte_rdtsc_precise(); @@ -2025,7 +2063,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* ensure enqueue has been completed */ - rte_delay_ms(10); + rte_delay_us(200); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -2106,7 +2144,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* ensure enqueue has been completed */ - rte_delay_ms(10); + rte_delay_us(200); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); diff --git a/drivers/baseband/turbo_sw/bbdev_turbo_software.c b/drivers/baseband/turbo_sw/bbdev_turbo_software.c index 57f6ba1..19fbb55 100644 --- a/drivers/baseband/turbo_sw/bbdev_turbo_software.c +++ b/drivers/baseband/turbo_sw/bbdev_turbo_software.c @@ -83,6 +83,18 @@ struct turbo_sw_queue { enum rte_bbdev_op_type type; } __rte_cache_aligned; +static inline char * +mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) +{ + if (unlikely(len > rte_pktmbuf_tailroom(m))) + return NULL; + + char *tail = (char *)m->buf_addr + m->data_off + m->data_len; + m->data_len = (uint16_t)(m->data_len + len); + m_head->pkt_len = (m_head->pkt_len + len); + return tail; +} + /* Calculate index based on Table 5.1.3-3 from TS34.212 */ static inline int32_t compute_idx(uint16_t k) @@ -437,7 +449,7 @@ struct turbo_sw_queue { return -1; } - if (in_length - kw < 0) { + if (in_length < kw) { rte_bbdev_log(ERR, "Mismatch between input length (%u) and kw (%u)", in_length, kw); @@ -456,9 +468,9 @@ struct turbo_sw_queue { static inline void process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, - uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out, - uint16_t in_offset, uint16_t out_offset, uint16_t total_left, - struct rte_bbdev_stats *q_stats) + uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, + struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, + uint16_t in_length, struct rte_bbdev_stats *q_stats) { int ret; int16_t k_idx; @@ -484,7 +496,7 @@ struct turbo_sw_queue { /* CRC24A (for TB) */ if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && (enc->code_block_mode == 1)) { - ret = is_enc_input_valid(k - 24, k_idx, total_left); + ret = is_enc_input_valid(k - 24, k_idx, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; @@ -494,7 +506,7 @@ struct turbo_sw_queue { /* Check if there is a room for CRC bits if not use * the temporary buffer. */ - if (rte_pktmbuf_append(m_in, 3) == NULL) { + if (mbuf_append(m_in, m_in, 3) == NULL) { rte_memcpy(q->enc_in, in, (k - 24) >> 3); in = q->enc_in; } else { @@ -517,7 +529,7 @@ struct turbo_sw_queue { #endif } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { /* CRC24B */ - ret = is_enc_input_valid(k - 24, k_idx, total_left); + ret = is_enc_input_valid(k - 24, k_idx, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; @@ -527,7 +539,7 @@ struct turbo_sw_queue { /* Check if there is a room for CRC bits if this is the last * CB in TB. If not use temporary buffer. */ - if ((c - r == 1) && (rte_pktmbuf_append(m_in, 3) == NULL)) { + if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { rte_memcpy(q->enc_in, in, (k - 24) >> 3); in = q->enc_in; } else if (c - r > 1) { @@ -549,7 +561,7 @@ struct turbo_sw_queue { q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; #endif } else { - ret = is_enc_input_valid(k, k_idx, total_left); + ret = is_enc_input_valid(k, k_idx, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; @@ -570,7 +582,8 @@ struct turbo_sw_queue { out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); } else { - out0 = (uint8_t *)rte_pktmbuf_append(m_out, (k >> 3) * 3 + 2); + out0 = (uint8_t *)mbuf_append(m_out_head, m_out, + (k >> 3) * 3 + 2); if (out0 == NULL) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; rte_bbdev_log(ERR, @@ -623,7 +636,7 @@ struct turbo_sw_queue { const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; /* get output data starting address */ - rm_out = (uint8_t *)rte_pktmbuf_append(m_out, out_len); + rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); if (rm_out == NULL) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; rte_bbdev_log(ERR, @@ -725,14 +738,16 @@ struct turbo_sw_queue { uint16_t out_offset = enc->output.offset; struct rte_mbuf *m_in = enc->input.data; struct rte_mbuf *m_out = enc->output.data; - uint16_t total_left = enc->input.length; + struct rte_mbuf *m_out_head = enc->output.data; + uint32_t in_length, mbuf_total_left = enc->input.length; + uint16_t seg_total_left; /* Clear op status */ op->status = 0; - if (total_left > RTE_BBDEV_MAX_TB_SIZE >> 3) { + if (mbuf_total_left > RTE_BBDEV_MAX_TB_SIZE >> 3) { rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", - total_left, RTE_BBDEV_MAX_TB_SIZE); + mbuf_total_left, RTE_BBDEV_MAX_TB_SIZE); op->status = 1 << RTE_BBDEV_DATA_ERROR; return; } @@ -755,7 +770,10 @@ struct turbo_sw_queue { r = 0; } - while (total_left > 0 && r < c) { + while (mbuf_total_left > 0 && r < c) { + + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + if (enc->code_block_mode == 0) { k = (r < enc->tb_params.c_neg) ? enc->tb_params.k_neg : enc->tb_params.k_pos; @@ -769,22 +787,32 @@ struct turbo_sw_queue { e = enc->cb_params.e; } - process_enc_cb(q, op, r, c, k, ncb, e, m_in, - m_out, in_offset, out_offset, total_left, + process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, + m_out, in_offset, out_offset, seg_total_left, queue_stats); /* Update total_left */ - total_left -= (k - crc24_bits) >> 3; + in_length = ((k - crc24_bits) >> 3); + mbuf_total_left -= in_length; /* Update offsets for next CBs (if exist) */ in_offset += (k - crc24_bits) >> 3; if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) out_offset += e >> 3; else out_offset += (k >> 3) * 3 + 2; + + /* Update offsets */ + if (seg_total_left == in_length) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + in_offset = 0; + out_offset = 0; + } r++; } /* check if all input data was processed */ - if (total_left != 0) { + if (mbuf_total_left != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; rte_bbdev_log(ERR, "Mismatch between mbuf length and included CBs sizes"); @@ -903,8 +931,9 @@ struct turbo_sw_queue { static inline void process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, - struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, - bool check_crc_24b, uint16_t crc24_overlap, uint16_t total_left, + struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, + uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, + uint16_t crc24_overlap, uint16_t in_length, struct rte_bbdev_stats *q_stats) { int ret; @@ -925,7 +954,7 @@ struct turbo_sw_queue { k_idx = compute_idx(k); - ret = is_dec_input_valid(k_idx, kw, total_left); + ret = is_dec_input_valid(k_idx, kw, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; @@ -983,7 +1012,8 @@ struct turbo_sw_queue { q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; #endif - out = (uint8_t *)rte_pktmbuf_append(m_out, ((k - crc24_overlap) >> 3)); + out = (uint8_t *)mbuf_append(m_out_head, m_out, + ((k - crc24_overlap) >> 3)); if (out == NULL) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; rte_bbdev_log(ERR, "Too little space in output mbuf"); @@ -1038,9 +1068,11 @@ struct turbo_sw_queue { struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; struct rte_mbuf *m_in = dec->input.data; struct rte_mbuf *m_out = dec->hard_output.data; + struct rte_mbuf *m_out_head = dec->hard_output.data; uint16_t in_offset = dec->input.offset; - uint16_t total_left = dec->input.length; uint16_t out_offset = dec->hard_output.offset; + uint32_t mbuf_total_left = dec->input.length; + uint16_t seg_total_left; /* Clear op status */ op->status = 0; @@ -1062,11 +1094,13 @@ struct turbo_sw_queue { RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) crc24_overlap = 24; - while (total_left > 0) { + while (mbuf_total_left > 0) { if (dec->code_block_mode == 0) k = (r < dec->tb_params.c_neg) ? dec->tb_params.k_neg : dec->tb_params.k_pos; + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + /* Calculates circular buffer size (Kw). * According to 3gpp 36.212 section 5.1.4.2 * Kw = 3 * Kpi, @@ -1079,23 +1113,32 @@ struct turbo_sw_queue { */ kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_C_SUBBLOCK) * 3; - process_dec_cb(q, op, c, k, kw, m_in, m_out, in_offset, - out_offset, check_bit(dec->op_flags, + process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, + in_offset, out_offset, check_bit(dec->op_flags, RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, - total_left, queue_stats); + seg_total_left, queue_stats); /* To keep CRC24 attached to end of Code block, use * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it * removed by default once verified. */ - /* Update total_left */ - total_left -= kw; - /* Update offsets for next CBs (if exist) */ - in_offset += kw; - out_offset += ((k - crc24_overlap) >> 3); + mbuf_total_left -= kw; + + /* Update offsets */ + if (seg_total_left == kw) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + in_offset = 0; + out_offset = 0; + } else { + /* Update offsets for next CBs (if exist) */ + in_offset += kw; + out_offset += ((k - crc24_overlap) >> 3); + } r++; } - if (total_left != 0) { + if (mbuf_total_left != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; rte_bbdev_log(ERR, "Mismatch between mbuf length and included Circular buffer sizes"); -- 1.8.3.1