DPDK patches and discussions
 help / color / mirror / Atom feed
From: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 12/24] pipeline: create inline functions for ALU instructions
Date: Fri, 10 Sep 2021 13:29:51 +0100	[thread overview]
Message-ID: <20210910123003.85448-12-cristian.dumitrescu@intel.com> (raw)
In-Reply-To: <20210910123003.85448-1-cristian.dumitrescu@intel.com>

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 348 ++-----------
 lib/pipeline/rte_swx_pipeline_internal.h | 616 +++++++++++++++++++++++
 2 files changed, 660 insertions(+), 304 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index a06dc8d348..8956b6de27 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3044,10 +3044,8 @@ instr_alu_add_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add\n", p->thread_id);
-
-	/* Structs. */
-	ALU(t, ip, +);
+	/* Structs */
+	__instr_alu_add_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3059,10 +3057,8 @@ instr_alu_add_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, +);
+	__instr_alu_add_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3074,10 +3070,8 @@ instr_alu_add_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, +);
+	__instr_alu_add_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3089,10 +3083,8 @@ instr_alu_add_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, +);
+	__instr_alu_add_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3104,10 +3096,8 @@ instr_alu_add_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, +);
+	__instr_alu_add_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3119,10 +3109,8 @@ instr_alu_add_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, +);
+	__instr_alu_add_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3134,10 +3122,8 @@ instr_alu_sub_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, -);
+	__instr_alu_sub_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3149,10 +3135,8 @@ instr_alu_sub_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, -);
+	__instr_alu_sub_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3164,10 +3148,8 @@ instr_alu_sub_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, -);
+	__instr_alu_sub_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3179,10 +3161,8 @@ instr_alu_sub_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, -);
+	__instr_alu_sub_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3194,10 +3174,8 @@ instr_alu_sub_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, -);
+	__instr_alu_sub_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3209,10 +3187,8 @@ instr_alu_sub_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, -);
+	__instr_alu_sub_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3224,10 +3200,8 @@ instr_alu_shl_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, <<);
+	__instr_alu_shl_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3239,10 +3213,8 @@ instr_alu_shl_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, <<);
+	__instr_alu_shl_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3254,10 +3226,8 @@ instr_alu_shl_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, <<);
+	__instr_alu_shl_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3269,10 +3239,8 @@ instr_alu_shl_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, <<);
+	__instr_alu_shl_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3284,10 +3252,8 @@ instr_alu_shl_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, <<);
+	__instr_alu_shl_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3299,10 +3265,8 @@ instr_alu_shl_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, <<);
+	__instr_alu_shl_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3314,10 +3278,8 @@ instr_alu_shr_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, >>);
+	__instr_alu_shr_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3329,10 +3291,8 @@ instr_alu_shr_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, >>);
+	__instr_alu_shr_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3344,10 +3304,8 @@ instr_alu_shr_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, >>);
+	__instr_alu_shr_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3359,10 +3317,8 @@ instr_alu_shr_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, >>);
+	__instr_alu_shr_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3374,10 +3330,8 @@ instr_alu_shr_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, >>);
+	__instr_alu_shr_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3389,10 +3343,8 @@ instr_alu_shr_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, >>);
+	__instr_alu_shr_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3404,10 +3356,8 @@ instr_alu_and_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, &);
+	__instr_alu_and_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3419,10 +3369,8 @@ instr_alu_and_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, &);
+	__instr_alu_and_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3434,10 +3382,8 @@ instr_alu_and_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, &);
+	__instr_alu_and_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3449,10 +3395,8 @@ instr_alu_and_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, &);
+	__instr_alu_and_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3464,10 +3408,8 @@ instr_alu_and_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, &);
+	__instr_alu_and_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3479,10 +3421,8 @@ instr_alu_or_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, |);
+	__instr_alu_or_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3494,10 +3434,8 @@ instr_alu_or_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, |);
+	__instr_alu_or_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3509,10 +3447,8 @@ instr_alu_or_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, |);
+	__instr_alu_or_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3524,10 +3460,8 @@ instr_alu_or_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, |);
+	__instr_alu_or_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3539,10 +3473,8 @@ instr_alu_or_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, |);
+	__instr_alu_or_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3554,10 +3486,8 @@ instr_alu_xor_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, ^);
+	__instr_alu_xor_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3569,10 +3499,8 @@ instr_alu_xor_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, ^);
+	__instr_alu_xor_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3584,10 +3512,8 @@ instr_alu_xor_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, ^);
+	__instr_alu_xor_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3599,10 +3525,8 @@ instr_alu_xor_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, ^);
+	__instr_alu_xor_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3614,10 +3538,8 @@ instr_alu_xor_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, ^);
+	__instr_alu_xor_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3628,55 +3550,9 @@ instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* The first input (r) is a 16-bit number. The second and the third
-	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
-	 * three numbers (output r) is a 34-bit number.
-	 */
-	r += (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is an 18-bit
-	 * number. In the worst case scenario, the sum of the two numbers is a
-	 * 19-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
-	 * therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3687,67 +3563,9 @@ instr_alu_cksub_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
-	 * the following sequence of operations in 2's complement arithmetic:
-	 *    a '- b = (a - b) % 0xFFFF.
-	 *
-	 * In order to prevent an underflow for the below subtraction, in which
-	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
-	 * minuend), we first add a multiple of the 0xFFFF modulus to the
-	 * minuend. The number we add to the minuend needs to be a 34-bit number
-	 * or higher, so for readability reasons we picked the 36-bit multiple.
-	 * We are effectively turning the 16-bit minuend into a 36-bit number:
-	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
-	 */
-	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
-
-	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
-	 * result (the output r) is a 36-bit number.
-	 */
-	r -= (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_cksub_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3758,47 +3576,9 @@ instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r0, r1;
-
-	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
-	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
-	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
-	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
-	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
-
-	/* The first input is a 16-bit number. The second input is a 19-bit
-	 * number. Their sum is a 20-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	r0 = ~r0 & 0xFFFF;
-	r0 = r0 ? r0 : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r0;
+	__instr_alu_ckadd_struct20_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3809,49 +3589,9 @@ instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r = 0;
-	uint32_t i;
-
-	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
-	 * Therefore, in the worst case scenario, a 35-bit number is added to a
-	 * 16-bit number (the input r), so the output r is 36-bit number.
-	 */
-	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
-		r += *src32_ptr;
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_struct_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index ec8e342a5d..7c4a2c05ef 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2211,4 +2211,620 @@ __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct
 	__instr_dma_ht_many_exec(p, t, ip, 8);
 }
 
+/*
+ * alu.
+ */
+static inline void
+__instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add\n", p->thread_id);
+
+	ALU(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, +);
+}
+
+static inline void
+__instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub\n", p->thread_id);
+
+	ALU(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, -);
+}
+
+static inline void
+__instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl\n", p->thread_id);
+
+	ALU(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr\n", p->thread_id);
+
+	ALU(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
+
+	/* Structs. */
+	ALU_MI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and\n", p->thread_id);
+
+	ALU(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (i)\n", p->thread_id);
+
+	ALU_I(t, ip, &);
+}
+
+static inline void
+__instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or\n", p->thread_id);
+
+	ALU(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		      struct thread *t,
+		      const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (i)\n", p->thread_id);
+
+	ALU_I(t, ip, |);
+}
+
+static inline void
+__instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor\n", p->thread_id);
+
+	ALU(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
+
+	ALU_I(t, ip, ^);
+}
+
+static inline void
+__instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* The first input (r) is a 16-bit number. The second and the third
+	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
+	 * three numbers (output r) is a 34-bit number.
+	 */
+	r += (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is an 18-bit
+	 * number. In the worst case scenario, the sum of the two numbers is a
+	 * 19-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
+	 * therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
+	 * the following sequence of operations in 2's complement arithmetic:
+	 *    a '- b = (a - b) % 0xFFFF.
+	 *
+	 * In order to prevent an underflow for the below subtraction, in which
+	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
+	 * minuend), we first add a multiple of the 0xFFFF modulus to the
+	 * minuend. The number we add to the minuend needs to be a 34-bit number
+	 * or higher, so for readability reasons we picked the 36-bit multiple.
+	 * We are effectively turning the 16-bit minuend into a 36-bit number:
+	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
+	 */
+	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
+
+	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
+	 * result (the output r) is a 36-bit number.
+	 */
+	r -= (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
+				struct thread *t,
+				const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r0, r1;
+
+	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
+	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
+	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
+	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
+	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
+
+	/* The first input is a 16-bit number. The second input is a 19-bit
+	 * number. Their sum is a 20-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	r0 = ~r0 & 0xFFFF;
+	r0 = r0 ? r0 : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r0;
+}
+
+static inline void
+__instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
+			      struct thread *t,
+			      const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r = 0;
+	uint32_t i;
+
+	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
+	 * Therefore, in the worst case scenario, a 35-bit number is added to a
+	 * 16-bit number (the input r), so the output r is 36-bit number.
+	 */
+	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
+		r += *src32_ptr;
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
 #endif
-- 
2.17.1


  parent reply	other threads:[~2021-09-10 12:31 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
2021-09-10 12:29 ` Cristian Dumitrescu [this message]
2021-09-10 12:29 ` [dpdk-dev] [PATCH 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 17/24] pipeline: introduce action functions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 21/24] pipeline: generate action functions Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 17/24] pipeline: introduce action functions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 21/24] pipeline: generate action functions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
2021-09-10 14:09   ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Bruce Richardson
2021-09-13 17:07     ` Dumitrescu, Cristian
2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 17/24] pipeline: introduce action functions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 21/24] pipeline: generate action functions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
2021-09-13 16:51     ` [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file Stephen Hemminger
2021-09-13 18:42       ` Dumitrescu, Cristian
2021-09-13 19:02         ` Stephen Hemminger
2021-09-20 15:24     ` Dumitrescu, Cristian
2021-09-27 10:11     ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210910123003.85448-12-cristian.dumitrescu@intel.com \
    --to=cristian.dumitrescu@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).