From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 3B5BFA04A5;
	Tue,  8 Feb 2022 22:52:10 +0100 (CET)
Received: from [217.70.189.124] (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id C0C6B41143;
	Tue,  8 Feb 2022 22:52:09 +0100 (CET)
Received: from mga03.intel.com (mga03.intel.com [134.134.136.65])
 by mails.dpdk.org (Postfix) with ESMTP id 4A2A341101
 for <dev@dpdk.org>; Tue,  8 Feb 2022 22:52:08 +0100 (CET)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1644357128; x=1675893128;
 h=from:to:cc:subject:date:message-id:in-reply-to: references;
 bh=QYFJSVjU1zn02JTAMTcF6Ya8sKjaefdrnQ4rEr5g3Kc=;
 b=Z4zZLeXhlE9at+10rtp81LJFQbbrTSEDzqm3aEPaTr2rxTVC5Tuhn8aR
 jkdvTi7spivwAbGZd6cBI8kTFxTuIorhdhBZEgv8h+Kf4bSD2YO629KWB
 pSEWcx4wbcRdYsEszgz5CDL3gazhGjpykj74C41LcsIKtPfyYJHmODP7t
 Zw2oTJ5CCEdOV3bfwmz4wOWTe9P+1LpQI7cSo6iB6rlVzoHp/Lle3FW+G
 Bln92qdkXFwVZ9WEw3T8vnHuTIc7Jr3b+Y2KRzUE3myqxfmQjTehT+0Q+
 Qo9fypIlRiSQ3FV5WFwH9A6aKeuHS+31d5kUIp5YjmpQ8Ap/eZgG5Wbru w==;
X-IronPort-AV: E=McAfee;i="6200,9189,10252"; a="249008278"
X-IronPort-AV: E=Sophos;i="5.88,353,1635231600"; d="scan'208";a="249008278"
Received: from fmsmga003.fm.intel.com ([10.253.24.29])
 by orsmga103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 08 Feb 2022 13:52:07 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.88,353,1635231600"; d="scan'208";a="622048110"
Received: from silpixa00400573.ir.intel.com (HELO
 silpixa00400573.ger.corp.intel.com) ([10.237.223.107])
 by FMSMGA003.fm.intel.com with ESMTP; 08 Feb 2022 13:52:06 -0800
From: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
To: dev@dpdk.org
Cc: Yogesh Jangra <yogesh.jangra@intel.com>,
 Harshad Suresh Narayane <harshad.suresh.narayane@intel.com>
Subject: [PATCH V2] pipeline: support checksum for variable size headers
Date: Tue,  8 Feb 2022 21:52:05 +0000
Message-Id: <20220208215205.33730-1-cristian.dumitrescu@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20220124223249.67660-1-cristian.dumitrescu@intel.com>
References: <20220124223249.67660-1-cristian.dumitrescu@intel.com>
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Added checksum support for variable size headers such as IPv4 headers
with options.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Signed-off-by: Yogesh Jangra <yogesh.jangra@intel.com>
Signed-off-by: Harshad Suresh Narayane <harshad.suresh.narayane@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 13 +++---
 lib/pipeline/rte_swx_pipeline_internal.h | 59 ++++++++++++++++--------
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 1a50c4bb72..eb54ccaeda 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2887,8 +2887,8 @@ instr_alu_ckadd_translate(struct rte_swx_pipeline *p,
 	CHECK(n_tokens == 3, EINVAL);
 
 	fdst = header_field_parse(p, dst, &hdst);
-	CHECK(fdst && (fdst->n_bits == 16), EINVAL);
-	CHECK(!fdst->var_size, EINVAL);
+	CHECK(fdst, EINVAL);
+	CHECK(!fdst->var_size && (fdst->n_bits == 16), EINVAL);
 
 	/* CKADD_FIELD. */
 	fsrc = header_field_parse(p, src, &hsrc);
@@ -2908,17 +2908,16 @@ instr_alu_ckadd_translate(struct rte_swx_pipeline *p,
 	/* CKADD_STRUCT, CKADD_STRUCT20. */
 	hsrc = header_parse(p, src);
 	CHECK(hsrc, EINVAL);
-	CHECK(!hsrc->st->var_size, EINVAL);
 
 	instr->type = INSTR_ALU_CKADD_STRUCT;
-	if ((hsrc->st->n_bits / 8) == 20)
+	if (!hsrc->st->var_size && ((hsrc->st->n_bits / 8) == 20))
 		instr->type = INSTR_ALU_CKADD_STRUCT20;
 
 	instr->alu.dst.struct_id = (uint8_t)hdst->struct_id;
 	instr->alu.dst.n_bits = fdst->n_bits;
 	instr->alu.dst.offset = fdst->offset / 8;
 	instr->alu.src.struct_id = (uint8_t)hsrc->struct_id;
-	instr->alu.src.n_bits = hsrc->st->n_bits;
+	instr->alu.src.n_bits = (uint8_t)hsrc->id; /* The src header ID is stored here. */
 	instr->alu.src.offset = 0; /* Unused. */
 	return 0;
 }
@@ -2938,8 +2937,8 @@ instr_alu_cksub_translate(struct rte_swx_pipeline *p,
 	CHECK(n_tokens == 3, EINVAL);
 
 	fdst = header_field_parse(p, dst, &hdst);
-	CHECK(fdst && (fdst->n_bits == 16), EINVAL);
-	CHECK(!fdst->var_size, EINVAL);
+	CHECK(fdst, EINVAL);
+	CHECK(!fdst->var_size && (fdst->n_bits == 16), EINVAL);
 
 	fsrc = header_field_parse(p, src, &hsrc);
 	CHECK(fsrc, EINVAL);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 2e86383e45..95ef6efc7e 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -305,16 +305,16 @@ enum instruction_type {
 	INSTR_ALU_SUB_HI, /* dst = H, src = I */
 
 	/* ckadd dst src
-	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
-	 * dst = H, src = {H, h.header}
+	 * dst = dst '+ src[0:1] '+ src[2:3] '+ ...
+	 * dst = H, src = {H, h.header}, '+ = 1's complement addition operator
 	 */
 	INSTR_ALU_CKADD_FIELD,    /* src = H */
-	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
-	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
+	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 bytes. */
+	INSTR_ALU_CKADD_STRUCT,   /* src = h.header, with sizeof(header) any 4-byte multiple. */
 
 	/* cksub dst src
 	 * dst = dst '- src
-	 * dst = H, src = H
+	 * dst = H, src = H, '- = 1's complement subtraction operator
 	 */
 	INSTR_ALU_CKSUB_FIELD,
 
@@ -2700,6 +2700,7 @@ __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
 	src = src64 & src64_mask;
 
+	/* Initialize the result with destination 1's complement. */
 	r = dst;
 	r = ~r & 0xFFFF;
 
@@ -2727,6 +2728,7 @@ __instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
 	 */
 	r = (r & 0xFFFF) + (r >> 16);
 
+	/* Apply 1's complement to the result. */
 	r = ~r & 0xFFFF;
 	r = r ? r : 0xFFFF;
 
@@ -2756,6 +2758,7 @@ __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
 	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
 	src = src64 & src64_mask;
 
+	/* Initialize the result with destination 1's complement. */
 	r = dst;
 	r = ~r & 0xFFFF;
 
@@ -2795,6 +2798,7 @@ __instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
 	 */
 	r = (r & 0xFFFF) + (r >> 16);
 
+	/* Apply 1's complement to the result. */
 	r = ~r & 0xFFFF;
 	r = r ? r : 0xFFFF;
 
@@ -2807,7 +2811,7 @@ __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
 				const struct instruction *ip)
 {
 	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
+	uint16_t *dst16_ptr, dst;
 	uint32_t *src32_ptr;
 	uint64_t r0, r1;
 
@@ -2816,13 +2820,18 @@ __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
 	/* Structs. */
 	dst_struct = t->structs[ip->alu.dst.struct_id];
 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
 
 	src_struct = t->structs[ip->alu.src.struct_id];
 	src32_ptr = (uint32_t *)&src_struct[0];
 
-	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
+	/* Initialize the result with destination 1's complement. */
+	r0 = dst;
+	r0 = ~r0 & 0xFFFF;
+
+	r0 += src32_ptr[0]; /* The output r0 is a 33-bit number. */
 	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
-	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
+	r0 += src32_ptr[2]; /* The output r0 is a 34-bit number. */
 	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
 	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
 
@@ -2843,6 +2852,7 @@ __instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
 	 */
 	r0 = (r0 & 0xFFFF) + (r0 >> 16);
 
+	/* Apply 1's complement to the result. */
 	r0 = ~r0 & 0xFFFF;
 	r0 = r0 ? r0 : 0xFFFF;
 
@@ -2854,45 +2864,58 @@ __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
 			      struct thread *t,
 			      const struct instruction *ip)
 {
+	uint32_t src_header_id = ip->alu.src.n_bits; /* The src header ID is stored here. */
+	uint32_t n_src_header_bytes = t->headers[src_header_id].n_bytes;
 	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
+	uint16_t *dst16_ptr, dst;
 	uint32_t *src32_ptr;
-	uint64_t r = 0;
+	uint64_t r;
 	uint32_t i;
 
+	if (n_src_header_bytes == 20) {
+		__instr_alu_ckadd_struct20_exec(p, t, ip);
+		return;
+	}
+
 	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
 
 	/* Structs. */
 	dst_struct = t->structs[ip->alu.dst.struct_id];
 	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
 
 	src_struct = t->structs[ip->alu.src.struct_id];
 	src32_ptr = (uint32_t *)&src_struct[0];
 
-	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
-	 * Therefore, in the worst case scenario, a 35-bit number is added to a
-	 * 16-bit number (the input r), so the output r is 36-bit number.
+	/* Initialize the result with destination 1's complement. */
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* The max number of 32-bit words in a 32K-byte header is 2^13.
+	 * Therefore, in the worst case scenario, a 45-bit number is added to a
+	 * 16-bit number (the input r), so the output r is 46-bit number.
 	 */
-	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
+	for (i = 0; i < n_src_header_bytes / 4; i++, src32_ptr++)
 		r += *src32_ptr;
 
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
+	/* The first input is a 16-bit number. The second input is a 30-bit
+	 * number. Their sum is a 31-bit number.
 	 */
 	r = (r & 0xFFFF) + (r >> 16);
 
 	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
+	 * a 15-bit number (0 .. 0x7FFF). The sum is a 17-bit number (0 .. 0x17FFE).
 	 */
 	r = (r & 0xFFFF) + (r >> 16);
 
 	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
 	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * 0x17FFE), the output r is (0 .. 0x7FFF). So no carry bit can be
 	 * generated, therefore the output r is always a 16-bit number.
 	 */
 	r = (r & 0xFFFF) + (r >> 16);
 
+	/* Apply 1's complement to the result. */
 	r = ~r & 0xFFFF;
 	r = r ? r : 0xFFFF;
 
-- 
2.17.1