From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6937CA04B5; Thu, 1 Oct 2020 12:27:13 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id BD89A1DB92; Thu, 1 Oct 2020 12:20:43 +0200 (CEST) Received: from mga06.intel.com (mga06.intel.com [134.134.136.31]) by dpdk.org (Postfix) with ESMTP id 0AE3A1DB6D for ; Thu, 1 Oct 2020 12:20:37 +0200 (CEST) IronPort-SDR: Y3hVMvMc75IR4XAengpZ35hysUjoAJaHpL+gzjVsCIeE2oIY5Nmq3Qg69+CGyWwC1UOV2qrZwk DfFmz0gc+Bmg== X-IronPort-AV: E=McAfee;i="6000,8403,9760"; a="224297087" X-IronPort-AV: E=Sophos;i="5.77,323,1596524400"; d="scan'208";a="224297087" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 01 Oct 2020 03:20:35 -0700 IronPort-SDR: YvcFyP4dyVLp2gTM1w5NrJ/3QprcvzS8xkiMoTJ0hM2kj0khCDj7iN6ypejfmaGK7tJRQyUfkN zc+k+/0aro2A== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.77,323,1596524400"; d="scan'208";a="515443405" Received: from silpixa00400573.ir.intel.com (HELO silpixa00400573.ger.corp.intel.com) ([10.237.223.107]) by fmsmga005.fm.intel.com with ESMTP; 01 Oct 2020 03:20:34 -0700 From: Cristian Dumitrescu To: dev@dpdk.org Cc: thomas@monjalon.net, david.marchand@redhat.com Date: Thu, 1 Oct 2020 11:19:44 +0100 Message-Id: <20201001102010.36861-17-cristian.dumitrescu@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201001102010.36861-1-cristian.dumitrescu@intel.com> References: <20200930063416.68428-2-cristian.dumitrescu@intel.com> <20201001102010.36861-1-cristian.dumitrescu@intel.com> Subject: [dpdk-dev] [PATCH v7 16/42] pipeline: introduce SWX ckadd instruction X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The ckadd (i.e. checksum add) instruction is used to either compute, verify or update the 1's complement sum commonly used by protocols such as IPv4, TCP or UDP. Signed-off-by: Cristian Dumitrescu --- lib/librte_pipeline/rte_swx_pipeline.c | 230 +++++++++++++++++++++++++ 1 file changed, 230 insertions(+) diff --git a/lib/librte_pipeline/rte_swx_pipeline.c b/lib/librte_pipeline/rte_swx_pipeline.c index 245621dc3..96e6c98aa 100644 --- a/lib/librte_pipeline/rte_swx_pipeline.c +++ b/lib/librte_pipeline/rte_swx_pipeline.c @@ -289,6 +289,14 @@ enum instruction_type { INSTR_ALU_SUB_HH, /* dst = H, src = H */ INSTR_ALU_SUB_MI, /* dst = MEF, src = I */ INSTR_ALU_SUB_HI, /* dst = H, src = I */ + + /* ckadd dst src + * dst = dst '+ src[0:1] '+ src[2:3] + ... + * dst = H, src = {H, h.header} + */ + INSTR_ALU_CKADD_FIELD, /* src = H */ + INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */ + INSTR_ALU_CKADD_STRUCT, /* src = h.hdeader, with any sizeof(header) */ }; struct instr_operand { @@ -2979,6 +2987,53 @@ instr_alu_sub_translate(struct rte_swx_pipeline *p, return 0; } +static int +instr_alu_ckadd_translate(struct rte_swx_pipeline *p, + struct action *action __rte_unused, + char **tokens, + int n_tokens, + struct instruction *instr, + struct instruction_data *data __rte_unused) +{ + char *dst = tokens[1], *src = tokens[2]; + struct header *hdst, *hsrc; + struct field *fdst, *fsrc; + + CHECK(n_tokens == 3, EINVAL); + + fdst = header_field_parse(p, dst, &hdst); + CHECK(fdst && (fdst->n_bits == 16), EINVAL); + + /* CKADD_FIELD. */ + fsrc = header_field_parse(p, src, &hsrc); + if (fsrc) { + instr->type = INSTR_ALU_CKADD_FIELD; + instr->alu.dst.struct_id = (uint8_t)hdst->struct_id; + instr->alu.dst.n_bits = fdst->n_bits; + instr->alu.dst.offset = fdst->offset / 8; + instr->alu.src.struct_id = (uint8_t)hsrc->struct_id; + instr->alu.src.n_bits = fsrc->n_bits; + instr->alu.src.offset = fsrc->offset / 8; + return 0; + } + + /* CKADD_STRUCT, CKADD_STRUCT20. */ + hsrc = header_parse(p, src); + CHECK(hsrc, EINVAL); + + instr->type = INSTR_ALU_CKADD_STRUCT; + if ((hsrc->st->n_bits / 8) == 20) + instr->type = INSTR_ALU_CKADD_STRUCT20; + + instr->alu.dst.struct_id = (uint8_t)hdst->struct_id; + instr->alu.dst.n_bits = fdst->n_bits; + instr->alu.dst.offset = fdst->offset / 8; + instr->alu.src.struct_id = (uint8_t)hsrc->struct_id; + instr->alu.src.n_bits = hsrc->st->n_bits; + instr->alu.src.offset = 0; /* Unused. */ + return 0; +} + static inline void instr_alu_add_exec(struct rte_swx_pipeline *p) { @@ -3159,6 +3214,169 @@ instr_alu_sub_hi_exec(struct rte_swx_pipeline *p) thread_ip_inc(p); } +static inline void +instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p) +{ + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + uint8_t *dst_struct, *src_struct; + uint16_t *dst16_ptr, dst; + uint64_t *src64_ptr, src64, src64_mask, src; + uint64_t r; + + TRACE("[Thread %2u] ckadd (field)\n", p->thread_id); + + /* Structs. */ + dst_struct = t->structs[ip->alu.dst.struct_id]; + dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset]; + dst = *dst16_ptr; + + src_struct = t->structs[ip->alu.src.struct_id]; + src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset]; + src64 = *src64_ptr; + src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits); + src = src64 & src64_mask; + + r = dst; + r = ~r & 0xFFFF; + + /* The first input (r) is a 16-bit number. The second and the third + * inputs are 32-bit numbers. In the worst case scenario, the sum of the + * three numbers (output r) is a 34-bit number. + */ + r += (src >> 32) + (src & 0xFFFFFFFF); + + /* The first input is a 16-bit number. The second input is an 18-bit + * number. In the worst case scenario, the sum of the two numbers is a + * 19-bit number. + */ + r = (r & 0xFFFF) + (r >> 16); + + /* The first input is a 16-bit number (0 .. 0xFFFF). The second input is + * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006). + */ + r = (r & 0xFFFF) + (r >> 16); + + /* When the input r is (0 .. 0xFFFF), the output r is equal to the input + * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 .. + * 0x10006), the output r is (0 .. 7). So no carry bit can be generated, + * therefore the output r is always a 16-bit number. + */ + r = (r & 0xFFFF) + (r >> 16); + + r = ~r & 0xFFFF; + r = r ? r : 0xFFFF; + + *dst16_ptr = (uint16_t)r; + + /* Thread. */ + thread_ip_inc(p); +} + +static inline void +instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p) +{ + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + uint8_t *dst_struct, *src_struct; + uint16_t *dst16_ptr; + uint32_t *src32_ptr; + uint64_t r0, r1; + + TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id); + + /* Structs. */ + dst_struct = t->structs[ip->alu.dst.struct_id]; + dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset]; + + src_struct = t->structs[ip->alu.src.struct_id]; + src32_ptr = (uint32_t *)&src_struct[0]; + + r0 = src32_ptr[0]; /* r0 is a 32-bit number. */ + r1 = src32_ptr[1]; /* r1 is a 32-bit number. */ + r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */ + r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */ + r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */ + + /* The first input is a 16-bit number. The second input is a 19-bit + * number. Their sum is a 20-bit number. + */ + r0 = (r0 & 0xFFFF) + (r0 >> 16); + + /* The first input is a 16-bit number (0 .. 0xFFFF). The second input is + * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E). + */ + r0 = (r0 & 0xFFFF) + (r0 >> 16); + + /* When the input r is (0 .. 0xFFFF), the output r is equal to the input + * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 .. + * 0x1000E), the output r is (0 .. 15). So no carry bit can be + * generated, therefore the output r is always a 16-bit number. + */ + r0 = (r0 & 0xFFFF) + (r0 >> 16); + + r0 = ~r0 & 0xFFFF; + r0 = r0 ? r0 : 0xFFFF; + + *dst16_ptr = (uint16_t)r0; + + /* Thread. */ + thread_ip_inc(p); +} + +static inline void +instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p) +{ + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + uint8_t *dst_struct, *src_struct; + uint16_t *dst16_ptr; + uint32_t *src32_ptr; + uint64_t r = 0; + uint32_t i; + + TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id); + + /* Structs. */ + dst_struct = t->structs[ip->alu.dst.struct_id]; + dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset]; + + src_struct = t->structs[ip->alu.src.struct_id]; + src32_ptr = (uint32_t *)&src_struct[0]; + + /* The max number of 32-bit words in a 256-byte header is 8 = 2^3. + * Therefore, in the worst case scenario, a 35-bit number is added to a + * 16-bit number (the input r), so the output r is 36-bit number. + */ + for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++) + r += *src32_ptr; + + /* The first input is a 16-bit number. The second input is a 20-bit + * number. Their sum is a 21-bit number. + */ + r = (r & 0xFFFF) + (r >> 16); + + /* The first input is a 16-bit number (0 .. 0xFFFF). The second input is + * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E). + */ + r = (r & 0xFFFF) + (r >> 16); + + /* When the input r is (0 .. 0xFFFF), the output r is equal to the input + * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 .. + * 0x1001E), the output r is (0 .. 31). So no carry bit can be + * generated, therefore the output r is always a 16-bit number. + */ + r = (r & 0xFFFF) + (r >> 16); + + r = ~r & 0xFFFF; + r = r ? r : 0xFFFF; + + *dst16_ptr = (uint16_t)r; + + /* Thread. */ + thread_ip_inc(p); +} + #define RTE_SWX_INSTRUCTION_TOKENS_MAX 16 static int @@ -3276,6 +3494,14 @@ instr_translate(struct rte_swx_pipeline *p, instr, data); + if (!strcmp(tokens[tpos], "ckadd")) + return instr_alu_ckadd_translate(p, + action, + &tokens[tpos], + n_tokens - tpos, + instr, + data); + CHECK(0, EINVAL); } @@ -3447,6 +3673,10 @@ static instr_exec_t instruction_table[] = { [INSTR_ALU_SUB_HH] = instr_alu_sub_hh_exec, [INSTR_ALU_SUB_MI] = instr_alu_sub_mi_exec, [INSTR_ALU_SUB_HI] = instr_alu_sub_hi_exec, + + [INSTR_ALU_CKADD_FIELD] = instr_alu_ckadd_field_exec, + [INSTR_ALU_CKADD_STRUCT] = instr_alu_ckadd_struct_exec, + [INSTR_ALU_CKADD_STRUCT20] = instr_alu_ckadd_struct20_exec, }; static inline void -- 2.17.1