DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file
@ 2021-09-10 12:29 Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
                   ` (23 more replies)
  0 siblings, 24 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Start to consolidate the data structures and inline functions required
by the pipeline instructions into an internal header file.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")

 lib/pipeline/meson.build                 |    4 +
 lib/pipeline/rte_swx_pipeline.c          | 1373 +--------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 1383 ++++++++++++++++++++++
 3 files changed, 1388 insertions(+), 1372 deletions(-)
 create mode 100644 lib/pipeline/rte_swx_pipeline_internal.h

diff --git a/lib/pipeline/meson.build b/lib/pipeline/meson.build
index 9132bb517a..ec009631bf 100644
--- a/lib/pipeline/meson.build
+++ b/lib/pipeline/meson.build
@@ -18,3 +18,7 @@ headers = files(
         'rte_swx_ctl.h',
 )
 deps += ['port', 'table', 'meter', 'sched', 'cryptodev']
+
+indirect_headers += files(
+        'rte_swx_pipeline_internal.h',
+)
diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index f89a134a52..ae9b2056db 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2,24 +2,11 @@
  * Copyright(c) 2020 Intel Corporation
  */
 #include <stdlib.h>
-#include <string.h>
 #include <stdio.h>
 #include <errno.h>
-#include <inttypes.h>
-#include <sys/queue.h>
 #include <arpa/inet.h>
 
-#include <rte_common.h>
-#include <rte_prefetch.h>
-#include <rte_byteorder.h>
-#include <rte_cycles.h>
-#include <rte_meter.h>
-
-#include <rte_swx_table_selector.h>
-#include <rte_swx_table_learner.h>
-
-#include "rte_swx_pipeline.h"
-#include "rte_swx_ctl.h"
+#include "rte_swx_pipeline_internal.h"
 
 #define CHECK(condition, err_code)                                             \
 do {                                                                           \
@@ -40,22 +27,9 @@ do {                                                                           \
 	       RTE_SWX_INSTRUCTION_SIZE),                                      \
 	      err_code)
 
-#ifndef TRACE_LEVEL
-#define TRACE_LEVEL 0
-#endif
-
-#if TRACE_LEVEL
-#define TRACE(...) printf(__VA_ARGS__)
-#else
-#define TRACE(...)
-#endif
-
 /*
  * Environment.
  */
-#define ntoh64(x) rte_be_to_cpu_64(x)
-#define hton64(x) rte_cpu_to_be_64(x)
-
 #ifndef RTE_SWX_PIPELINE_HUGE_PAGES_DISABLE
 
 #include <rte_malloc.h>
@@ -103,1351 +77,6 @@ env_free(void *start, size_t size)
 
 #endif
 
-/*
- * Struct.
- */
-struct field {
-	char name[RTE_SWX_NAME_SIZE];
-	uint32_t n_bits;
-	uint32_t offset;
-	int var_size;
-};
-
-struct struct_type {
-	TAILQ_ENTRY(struct_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct field *fields;
-	uint32_t n_fields;
-	uint32_t n_bits;
-	uint32_t n_bits_min;
-	int var_size;
-};
-
-TAILQ_HEAD(struct_type_tailq, struct_type);
-
-/*
- * Input port.
- */
-struct port_in_type {
-	TAILQ_ENTRY(port_in_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_swx_port_in_ops ops;
-};
-
-TAILQ_HEAD(port_in_type_tailq, port_in_type);
-
-struct port_in {
-	TAILQ_ENTRY(port_in) node;
-	struct port_in_type *type;
-	void *obj;
-	uint32_t id;
-};
-
-TAILQ_HEAD(port_in_tailq, port_in);
-
-struct port_in_runtime {
-	rte_swx_port_in_pkt_rx_t pkt_rx;
-	void *obj;
-};
-
-/*
- * Output port.
- */
-struct port_out_type {
-	TAILQ_ENTRY(port_out_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_swx_port_out_ops ops;
-};
-
-TAILQ_HEAD(port_out_type_tailq, port_out_type);
-
-struct port_out {
-	TAILQ_ENTRY(port_out) node;
-	struct port_out_type *type;
-	void *obj;
-	uint32_t id;
-};
-
-TAILQ_HEAD(port_out_tailq, port_out);
-
-struct port_out_runtime {
-	rte_swx_port_out_pkt_tx_t pkt_tx;
-	rte_swx_port_out_flush_t flush;
-	void *obj;
-};
-
-/*
- * Extern object.
- */
-struct extern_type_member_func {
-	TAILQ_ENTRY(extern_type_member_func) node;
-	char name[RTE_SWX_NAME_SIZE];
-	rte_swx_extern_type_member_func_t func;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
-
-struct extern_type {
-	TAILQ_ENTRY(extern_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *mailbox_struct_type;
-	rte_swx_extern_type_constructor_t constructor;
-	rte_swx_extern_type_destructor_t destructor;
-	struct extern_type_member_func_tailq funcs;
-	uint32_t n_funcs;
-};
-
-TAILQ_HEAD(extern_type_tailq, extern_type);
-
-struct extern_obj {
-	TAILQ_ENTRY(extern_obj) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct extern_type *type;
-	void *obj;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_obj_tailq, extern_obj);
-
-#ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
-#define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
-#endif
-
-struct extern_obj_runtime {
-	void *obj;
-	uint8_t *mailbox;
-	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
-};
-
-/*
- * Extern function.
- */
-struct extern_func {
-	TAILQ_ENTRY(extern_func) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *mailbox_struct_type;
-	rte_swx_extern_func_t func;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_func_tailq, extern_func);
-
-struct extern_func_runtime {
-	uint8_t *mailbox;
-	rte_swx_extern_func_t func;
-};
-
-/*
- * Header.
- */
-struct header {
-	TAILQ_ENTRY(header) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *st;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(header_tailq, header);
-
-struct header_runtime {
-	uint8_t *ptr0;
-	uint32_t n_bytes;
-};
-
-struct header_out_runtime {
-	uint8_t *ptr0;
-	uint8_t *ptr;
-	uint32_t n_bytes;
-};
-
-/*
- * Instruction.
- */
-
-/* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
- * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
- * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
- * when transferred to packet meta-data and in NBO when transferred to packet
- * headers.
- */
-
-/* Notation conventions:
- *    -Header field: H = h.header.field (dst/src)
- *    -Meta-data field: M = m.field (dst/src)
- *    -Extern object mailbox field: E = e.field (dst/src)
- *    -Extern function mailbox field: F = f.field (dst/src)
- *    -Table action data field: T = t.field (src only)
- *    -Immediate value: I = 32-bit unsigned value (src only)
- */
-
-enum instruction_type {
-	/* rx m.port_in */
-	INSTR_RX,
-
-	/* tx port_out
-	 * port_out = MI
-	 */
-	INSTR_TX,   /* port_out = M */
-	INSTR_TX_I, /* port_out = I */
-
-	/* extract h.header */
-	INSTR_HDR_EXTRACT,
-	INSTR_HDR_EXTRACT2,
-	INSTR_HDR_EXTRACT3,
-	INSTR_HDR_EXTRACT4,
-	INSTR_HDR_EXTRACT5,
-	INSTR_HDR_EXTRACT6,
-	INSTR_HDR_EXTRACT7,
-	INSTR_HDR_EXTRACT8,
-
-	/* extract h.header m.last_field_size */
-	INSTR_HDR_EXTRACT_M,
-
-	/* lookahead h.header */
-	INSTR_HDR_LOOKAHEAD,
-
-	/* emit h.header */
-	INSTR_HDR_EMIT,
-	INSTR_HDR_EMIT_TX,
-	INSTR_HDR_EMIT2_TX,
-	INSTR_HDR_EMIT3_TX,
-	INSTR_HDR_EMIT4_TX,
-	INSTR_HDR_EMIT5_TX,
-	INSTR_HDR_EMIT6_TX,
-	INSTR_HDR_EMIT7_TX,
-	INSTR_HDR_EMIT8_TX,
-
-	/* validate h.header */
-	INSTR_HDR_VALIDATE,
-
-	/* invalidate h.header */
-	INSTR_HDR_INVALIDATE,
-
-	/* mov dst src
-	 * dst = src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_MOV,    /* dst = MEF, src = MEFT */
-	INSTR_MOV_MH, /* dst = MEF, src = H */
-	INSTR_MOV_HM, /* dst = H, src = MEFT */
-	INSTR_MOV_HH, /* dst = H, src = H */
-	INSTR_MOV_I,  /* dst = HMEF, src = I */
-
-	/* dma h.header t.field
-	 * memcpy(h.header, t.field, sizeof(h.header))
-	 */
-	INSTR_DMA_HT,
-	INSTR_DMA_HT2,
-	INSTR_DMA_HT3,
-	INSTR_DMA_HT4,
-	INSTR_DMA_HT5,
-	INSTR_DMA_HT6,
-	INSTR_DMA_HT7,
-	INSTR_DMA_HT8,
-
-	/* add dst src
-	 * dst += src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
-	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
-	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
-	INSTR_ALU_ADD_HH, /* dst = H, src = H */
-	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
-	INSTR_ALU_ADD_HI, /* dst = H, src = I */
-
-	/* sub dst src
-	 * dst -= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SUB_HH, /* dst = H, src = H */
-	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SUB_HI, /* dst = H, src = I */
-
-	/* ckadd dst src
-	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
-	 * dst = H, src = {H, h.header}
-	 */
-	INSTR_ALU_CKADD_FIELD,    /* src = H */
-	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
-	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
-
-	/* cksub dst src
-	 * dst = dst '- src
-	 * dst = H, src = H
-	 */
-	INSTR_ALU_CKSUB_FIELD,
-
-	/* and dst src
-	 * dst &= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
-	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_AND_HH, /* dst = H, src = H */
-	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
-
-	/* or dst src
-	 * dst |= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_OR_HH, /* dst = H, src = H */
-	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
-
-	/* xor dst src
-	 * dst ^= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_XOR_HH, /* dst = H, src = H */
-	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
-
-	/* shl dst src
-	 * dst <<= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SHL_HH, /* dst = H, src = H */
-	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SHL_HI, /* dst = H, src = I */
-
-	/* shr dst src
-	 * dst >>= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SHR_HH, /* dst = H, src = H */
-	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SHR_HI, /* dst = H, src = I */
-
-	/* regprefetch REGARRAY index
-	 * prefetch REGARRAY[index]
-	 * index = HMEFTI
-	 */
-	INSTR_REGPREFETCH_RH, /* index = H */
-	INSTR_REGPREFETCH_RM, /* index = MEFT */
-	INSTR_REGPREFETCH_RI, /* index = I */
-
-	/* regrd dst REGARRAY index
-	 * dst = REGARRAY[index]
-	 * dst = HMEF, index = HMEFTI
-	 */
-	INSTR_REGRD_HRH, /* dst = H, index = H */
-	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
-	INSTR_REGRD_HRI, /* dst = H, index = I */
-	INSTR_REGRD_MRH, /* dst = MEF, index = H */
-	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
-	INSTR_REGRD_MRI, /* dst = MEF, index = I */
-
-	/* regwr REGARRAY index src
-	 * REGARRAY[index] = src
-	 * index = HMEFTI, src = HMEFTI
-	 */
-	INSTR_REGWR_RHH, /* index = H, src = H */
-	INSTR_REGWR_RHM, /* index = H, src = MEFT */
-	INSTR_REGWR_RHI, /* index = H, src = I */
-	INSTR_REGWR_RMH, /* index = MEFT, src = H */
-	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
-	INSTR_REGWR_RMI, /* index = MEFT, src = I */
-	INSTR_REGWR_RIH, /* index = I, src = H */
-	INSTR_REGWR_RIM, /* index = I, src = MEFT */
-	INSTR_REGWR_RII, /* index = I, src = I */
-
-	/* regadd REGARRAY index src
-	 * REGARRAY[index] += src
-	 * index = HMEFTI, src = HMEFTI
-	 */
-	INSTR_REGADD_RHH, /* index = H, src = H */
-	INSTR_REGADD_RHM, /* index = H, src = MEFT */
-	INSTR_REGADD_RHI, /* index = H, src = I */
-	INSTR_REGADD_RMH, /* index = MEFT, src = H */
-	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
-	INSTR_REGADD_RMI, /* index = MEFT, src = I */
-	INSTR_REGADD_RIH, /* index = I, src = H */
-	INSTR_REGADD_RIM, /* index = I, src = MEFT */
-	INSTR_REGADD_RII, /* index = I, src = I */
-
-	/* metprefetch METARRAY index
-	 * prefetch METARRAY[index]
-	 * index = HMEFTI
-	 */
-	INSTR_METPREFETCH_H, /* index = H */
-	INSTR_METPREFETCH_M, /* index = MEFT */
-	INSTR_METPREFETCH_I, /* index = I */
-
-	/* meter METARRAY index length color_in color_out
-	 * color_out = meter(METARRAY[index], length, color_in)
-	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
-	 */
-	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
-	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
-	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
-	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
-	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
-	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
-	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
-	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
-	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
-	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
-	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
-	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
-
-	/* table TABLE */
-	INSTR_TABLE,
-	INSTR_SELECTOR,
-	INSTR_LEARNER,
-
-	/* learn LEARNER ACTION_NAME */
-	INSTR_LEARNER_LEARN,
-
-	/* forget */
-	INSTR_LEARNER_FORGET,
-
-	/* extern e.obj.func */
-	INSTR_EXTERN_OBJ,
-
-	/* extern f.func */
-	INSTR_EXTERN_FUNC,
-
-	/* jmp LABEL
-	 * Unconditional jump
-	 */
-	INSTR_JMP,
-
-	/* jmpv LABEL h.header
-	 * Jump if header is valid
-	 */
-	INSTR_JMP_VALID,
-
-	/* jmpnv LABEL h.header
-	 * Jump if header is invalid
-	 */
-	INSTR_JMP_INVALID,
-
-	/* jmph LABEL
-	 * Jump if table lookup hit
-	 */
-	INSTR_JMP_HIT,
-
-	/* jmpnh LABEL
-	 * Jump if table lookup miss
-	 */
-	INSTR_JMP_MISS,
-
-	/* jmpa LABEL ACTION
-	 * Jump if action run
-	 */
-	INSTR_JMP_ACTION_HIT,
-
-	/* jmpna LABEL ACTION
-	 * Jump if action not run
-	 */
-	INSTR_JMP_ACTION_MISS,
-
-	/* jmpeq LABEL a b
-	 * Jump if a is equal to b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
-	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
-	INSTR_JMP_EQ_HH, /* a = H, b = H */
-	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
-
-	/* jmpneq LABEL a b
-	 * Jump if a is not equal to b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
-	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
-	INSTR_JMP_NEQ_HH, /* a = H, b = H */
-	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
-
-	/* jmplt LABEL a b
-	 * Jump if a is less than b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
-	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
-	INSTR_JMP_LT_HH, /* a = H, b = H */
-	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
-	INSTR_JMP_LT_HI, /* a = H, b = I */
-
-	/* jmpgt LABEL a b
-	 * Jump if a is greater than b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
-	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
-	INSTR_JMP_GT_HH, /* a = H, b = H */
-	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
-	INSTR_JMP_GT_HI, /* a = H, b = I */
-
-	/* return
-	 * Return from action
-	 */
-	INSTR_RETURN,
-};
-
-struct instr_operand {
-	uint8_t struct_id;
-	uint8_t n_bits;
-	uint8_t offset;
-	uint8_t pad;
-};
-
-struct instr_io {
-	struct {
-		union {
-			struct {
-				uint8_t offset;
-				uint8_t n_bits;
-				uint8_t pad[2];
-			};
-
-			uint32_t val;
-		};
-	} io;
-
-	struct {
-		uint8_t header_id[8];
-		uint8_t struct_id[8];
-		uint8_t n_bytes[8];
-	} hdr;
-};
-
-struct instr_hdr_validity {
-	uint8_t header_id;
-};
-
-struct instr_table {
-	uint8_t table_id;
-};
-
-struct instr_learn {
-	uint8_t action_id;
-};
-
-struct instr_extern_obj {
-	uint8_t ext_obj_id;
-	uint8_t func_id;
-};
-
-struct instr_extern_func {
-	uint8_t ext_func_id;
-};
-
-struct instr_dst_src {
-	struct instr_operand dst;
-	union {
-		struct instr_operand src;
-		uint64_t src_val;
-	};
-};
-
-struct instr_regarray {
-	uint8_t regarray_id;
-	uint8_t pad[3];
-
-	union {
-		struct instr_operand idx;
-		uint32_t idx_val;
-	};
-
-	union {
-		struct instr_operand dstsrc;
-		uint64_t dstsrc_val;
-	};
-};
-
-struct instr_meter {
-	uint8_t metarray_id;
-	uint8_t pad[3];
-
-	union {
-		struct instr_operand idx;
-		uint32_t idx_val;
-	};
-
-	struct instr_operand length;
-
-	union {
-		struct instr_operand color_in;
-		uint32_t color_in_val;
-	};
-
-	struct instr_operand color_out;
-};
-
-struct instr_dma {
-	struct {
-		uint8_t header_id[8];
-		uint8_t struct_id[8];
-	} dst;
-
-	struct {
-		uint8_t offset[8];
-	} src;
-
-	uint16_t n_bytes[8];
-};
-
-struct instr_jmp {
-	struct instruction *ip;
-
-	union {
-		struct instr_operand a;
-		uint8_t header_id;
-		uint8_t action_id;
-	};
-
-	union {
-		struct instr_operand b;
-		uint64_t b_val;
-	};
-};
-
-struct instruction {
-	enum instruction_type type;
-	union {
-		struct instr_io io;
-		struct instr_hdr_validity valid;
-		struct instr_dst_src mov;
-		struct instr_regarray regarray;
-		struct instr_meter meter;
-		struct instr_dma dma;
-		struct instr_dst_src alu;
-		struct instr_table table;
-		struct instr_learn learn;
-		struct instr_extern_obj ext_obj;
-		struct instr_extern_func ext_func;
-		struct instr_jmp jmp;
-	};
-};
-
-struct instruction_data {
-	char label[RTE_SWX_NAME_SIZE];
-	char jmp_label[RTE_SWX_NAME_SIZE];
-	uint32_t n_users; /* user = jmp instruction to this instruction. */
-	int invalid;
-};
-
-/*
- * Action.
- */
-struct action {
-	TAILQ_ENTRY(action) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *st;
-	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
-	struct instruction *instructions;
-	uint32_t n_instructions;
-	uint32_t id;
-};
-
-TAILQ_HEAD(action_tailq, action);
-
-/*
- * Table.
- */
-struct table_type {
-	TAILQ_ENTRY(table_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	enum rte_swx_table_match_type match_type;
-	struct rte_swx_table_ops ops;
-};
-
-TAILQ_HEAD(table_type_tailq, table_type);
-
-struct match_field {
-	enum rte_swx_table_match_type match_type;
-	struct field *field;
-};
-
-struct table {
-	TAILQ_ENTRY(table) node;
-	char name[RTE_SWX_NAME_SIZE];
-	char args[RTE_SWX_NAME_SIZE];
-	struct table_type *type; /* NULL when n_fields == 0. */
-
-	/* Match. */
-	struct match_field *fields;
-	uint32_t n_fields;
-	struct header *header; /* Only valid when n_fields > 0. */
-
-	/* Action. */
-	struct action **actions;
-	struct action *default_action;
-	uint8_t *default_action_data;
-	uint32_t n_actions;
-	int default_action_is_const;
-	uint32_t action_data_size_max;
-
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(table_tailq, table);
-
-struct table_runtime {
-	rte_swx_table_lookup_t func;
-	void *mailbox;
-	uint8_t **key;
-};
-
-struct table_statistics {
-	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
-	uint64_t *n_pkts_action;
-};
-
-/*
- * Selector.
- */
-struct selector {
-	TAILQ_ENTRY(selector) node;
-	char name[RTE_SWX_NAME_SIZE];
-
-	struct field *group_id_field;
-	struct field **selector_fields;
-	uint32_t n_selector_fields;
-	struct header *selector_header;
-	struct field *member_id_field;
-
-	uint32_t n_groups_max;
-	uint32_t n_members_per_group_max;
-
-	uint32_t id;
-};
-
-TAILQ_HEAD(selector_tailq, selector);
-
-struct selector_runtime {
-	void *mailbox;
-	uint8_t **group_id_buffer;
-	uint8_t **selector_buffer;
-	uint8_t **member_id_buffer;
-};
-
-struct selector_statistics {
-	uint64_t n_pkts;
-};
-
-/*
- * Learner table.
- */
-struct learner {
-	TAILQ_ENTRY(learner) node;
-	char name[RTE_SWX_NAME_SIZE];
-
-	/* Match. */
-	struct field **fields;
-	uint32_t n_fields;
-	struct header *header;
-
-	/* Action. */
-	struct action **actions;
-	struct field **action_arg;
-	struct action *default_action;
-	uint8_t *default_action_data;
-	uint32_t n_actions;
-	int default_action_is_const;
-	uint32_t action_data_size_max;
-
-	uint32_t size;
-	uint32_t timeout;
-	uint32_t id;
-};
-
-TAILQ_HEAD(learner_tailq, learner);
-
-struct learner_runtime {
-	void *mailbox;
-	uint8_t **key;
-	uint8_t **action_data;
-};
-
-struct learner_statistics {
-	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
-	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
-	uint64_t n_pkts_forget;
-	uint64_t *n_pkts_action;
-};
-
-/*
- * Register array.
- */
-struct regarray {
-	TAILQ_ENTRY(regarray) node;
-	char name[RTE_SWX_NAME_SIZE];
-	uint64_t init_val;
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(regarray_tailq, regarray);
-
-struct regarray_runtime {
-	uint64_t *regarray;
-	uint32_t size_mask;
-};
-
-/*
- * Meter array.
- */
-struct meter_profile {
-	TAILQ_ENTRY(meter_profile) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_meter_trtcm_params params;
-	struct rte_meter_trtcm_profile profile;
-	uint32_t n_users;
-};
-
-TAILQ_HEAD(meter_profile_tailq, meter_profile);
-
-struct metarray {
-	TAILQ_ENTRY(metarray) node;
-	char name[RTE_SWX_NAME_SIZE];
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(metarray_tailq, metarray);
-
-struct meter {
-	struct rte_meter_trtcm m;
-	struct meter_profile *profile;
-	enum rte_color color_mask;
-	uint8_t pad[20];
-
-	uint64_t n_pkts[RTE_COLORS];
-	uint64_t n_bytes[RTE_COLORS];
-};
-
-struct metarray_runtime {
-	struct meter *metarray;
-	uint32_t size_mask;
-};
-
-/*
- * Pipeline.
- */
-struct thread {
-	/* Packet. */
-	struct rte_swx_pkt pkt;
-	uint8_t *ptr;
-
-	/* Structures. */
-	uint8_t **structs;
-
-	/* Packet headers. */
-	struct header_runtime *headers; /* Extracted or generated headers. */
-	struct header_out_runtime *headers_out; /* Emitted headers. */
-	uint8_t *header_storage;
-	uint8_t *header_out_storage;
-	uint64_t valid_headers;
-	uint32_t n_headers_out;
-
-	/* Packet meta-data. */
-	uint8_t *metadata;
-
-	/* Tables. */
-	struct table_runtime *tables;
-	struct selector_runtime *selectors;
-	struct learner_runtime *learners;
-	struct rte_swx_table_state *table_state;
-	uint64_t action_id;
-	int hit; /* 0 = Miss, 1 = Hit. */
-	uint32_t learner_id;
-	uint64_t time;
-
-	/* Extern objects and functions. */
-	struct extern_obj_runtime *extern_objs;
-	struct extern_func_runtime *extern_funcs;
-
-	/* Instructions. */
-	struct instruction *ip;
-	struct instruction *ret;
-};
-
-#define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
-#define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
-#define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
-
-#define HEADER_VALID(thread, header_id) \
-	MASK64_BIT_GET((thread)->valid_headers, header_id)
-
-#define ALU(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define ALU_MH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#define ALU_HM(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#define ALU_HM_FAST(thread, ip, operator)  \
-{                                                                                 \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
-	uint64_t dst64 = *dst64_ptr;                                              \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
-	uint64_t dst = dst64 & dst64_mask;                                        \
-										  \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
-	uint64_t src64 = *src64_ptr;                                              \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
-	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
-										  \
-	uint64_t result = dst operator src;                                       \
-										  \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
-}
-
-#define ALU_HH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#define ALU_HH_FAST(thread, ip, operator)  \
-{                                                                                             \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
-	uint64_t dst64 = *dst64_ptr;                                                          \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
-	uint64_t dst = dst64 & dst64_mask;                                                    \
-											      \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
-	uint64_t src64 = *src64_ptr;                                                          \
-	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
-											      \
-	uint64_t result = dst operator src;                                                   \
-											      \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
-}
-
-#else
-
-#define ALU_MH ALU
-#define ALU_HM ALU
-#define ALU_HM_FAST ALU
-#define ALU_HH ALU
-#define ALU_HH_FAST ALU
-
-#endif
-
-#define ALU_I(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint64_t src = (ip)->alu.src_val;                                      \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#define ALU_MI ALU_I
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define ALU_HI(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint64_t src = (ip)->alu.src_val;                                      \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#else
-
-#define ALU_HI ALU_I
-
-#endif
-
-#define MOV(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define MOV_MH(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#define MOV_HM(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
-	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
-}
-
-#define MOV_HH(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-									       \
-	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
-	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
-	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
-}
-
-#else
-
-#define MOV_MH MOV
-#define MOV_HM MOV
-#define MOV_HH MOV
-
-#endif
-
-#define MOV_I(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint64_t src = (ip)->mov.src_val;                                      \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#define JMP_CMP(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
-	uint64_t b = b64 & b64_mask;                                           \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define JMP_CMP_MH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HM(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
-	uint64_t b = b64 & b64_mask;                                           \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HH_FAST(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#else
-
-#define JMP_CMP_MH JMP_CMP
-#define JMP_CMP_HM JMP_CMP
-#define JMP_CMP_HH JMP_CMP
-#define JMP_CMP_HH_FAST JMP_CMP
-
-#endif
-
-#define JMP_CMP_I(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint64_t b = (ip)->jmp.b_val;                                          \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_MI JMP_CMP_I
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define JMP_CMP_HI(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint64_t b = (ip)->jmp.b_val;                                          \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#else
-
-#define JMP_CMP_HI JMP_CMP_I
-
-#endif
-
-#define METADATA_READ(thread, offset, n_bits)                                  \
-({                                                                             \
-	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
-	uint64_t m64 = *m64_ptr;                                               \
-	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
-	(m64 & m64_mask);                                                      \
-})
-
-#define METADATA_WRITE(thread, offset, n_bits, value)                          \
-{                                                                              \
-	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
-	uint64_t m64 = *m64_ptr;                                               \
-	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
-									       \
-	uint64_t m_new = value;                                                \
-									       \
-	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
-}
-
-#ifndef RTE_SWX_PIPELINE_THREADS_MAX
-#define RTE_SWX_PIPELINE_THREADS_MAX 16
-#endif
-
-struct rte_swx_pipeline {
-	struct struct_type_tailq struct_types;
-	struct port_in_type_tailq port_in_types;
-	struct port_in_tailq ports_in;
-	struct port_out_type_tailq port_out_types;
-	struct port_out_tailq ports_out;
-	struct extern_type_tailq extern_types;
-	struct extern_obj_tailq extern_objs;
-	struct extern_func_tailq extern_funcs;
-	struct header_tailq headers;
-	struct struct_type *metadata_st;
-	uint32_t metadata_struct_id;
-	struct action_tailq actions;
-	struct table_type_tailq table_types;
-	struct table_tailq tables;
-	struct selector_tailq selectors;
-	struct learner_tailq learners;
-	struct regarray_tailq regarrays;
-	struct meter_profile_tailq meter_profiles;
-	struct metarray_tailq metarrays;
-
-	struct port_in_runtime *in;
-	struct port_out_runtime *out;
-	struct instruction **action_instructions;
-	struct rte_swx_table_state *table_state;
-	struct table_statistics *table_stats;
-	struct selector_statistics *selector_stats;
-	struct learner_statistics *learner_stats;
-	struct regarray_runtime *regarray_runtime;
-	struct metarray_runtime *metarray_runtime;
-	struct instruction *instructions;
-	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
-
-	uint32_t n_structs;
-	uint32_t n_ports_in;
-	uint32_t n_ports_out;
-	uint32_t n_extern_objs;
-	uint32_t n_extern_funcs;
-	uint32_t n_actions;
-	uint32_t n_tables;
-	uint32_t n_selectors;
-	uint32_t n_learners;
-	uint32_t n_regarrays;
-	uint32_t n_metarrays;
-	uint32_t n_headers;
-	uint32_t thread_id;
-	uint32_t port_id;
-	uint32_t n_instructions;
-	int build_done;
-	int numa_node;
-};
-
 /*
  * Struct.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
new file mode 100644
index 0000000000..5d80dd8451
--- /dev/null
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -0,0 +1,1383 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+#ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
+#define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
+
+#include <inttypes.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_meter.h>
+
+#include <rte_swx_table_selector.h>
+#include <rte_swx_table_learner.h>
+#include <rte_swx_pipeline.h>
+#include <rte_swx_ctl.h>
+
+#ifndef TRACE_LEVEL
+#define TRACE_LEVEL 0
+#endif
+
+#if TRACE_LEVEL
+#define TRACE(...) printf(__VA_ARGS__)
+#else
+#define TRACE(...)
+#endif
+
+/*
+ * Environment.
+ */
+#define ntoh64(x) rte_be_to_cpu_64(x)
+#define hton64(x) rte_cpu_to_be_64(x)
+
+/*
+ * Struct.
+ */
+struct field {
+	char name[RTE_SWX_NAME_SIZE];
+	uint32_t n_bits;
+	uint32_t offset;
+	int var_size;
+};
+
+struct struct_type {
+	TAILQ_ENTRY(struct_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct field *fields;
+	uint32_t n_fields;
+	uint32_t n_bits;
+	uint32_t n_bits_min;
+	int var_size;
+};
+
+TAILQ_HEAD(struct_type_tailq, struct_type);
+
+/*
+ * Input port.
+ */
+struct port_in_type {
+	TAILQ_ENTRY(port_in_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_swx_port_in_ops ops;
+};
+
+TAILQ_HEAD(port_in_type_tailq, port_in_type);
+
+struct port_in {
+	TAILQ_ENTRY(port_in) node;
+	struct port_in_type *type;
+	void *obj;
+	uint32_t id;
+};
+
+TAILQ_HEAD(port_in_tailq, port_in);
+
+struct port_in_runtime {
+	rte_swx_port_in_pkt_rx_t pkt_rx;
+	void *obj;
+};
+
+/*
+ * Output port.
+ */
+struct port_out_type {
+	TAILQ_ENTRY(port_out_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_swx_port_out_ops ops;
+};
+
+TAILQ_HEAD(port_out_type_tailq, port_out_type);
+
+struct port_out {
+	TAILQ_ENTRY(port_out) node;
+	struct port_out_type *type;
+	void *obj;
+	uint32_t id;
+};
+
+TAILQ_HEAD(port_out_tailq, port_out);
+
+struct port_out_runtime {
+	rte_swx_port_out_pkt_tx_t pkt_tx;
+	rte_swx_port_out_flush_t flush;
+	void *obj;
+};
+
+/*
+ * Extern object.
+ */
+struct extern_type_member_func {
+	TAILQ_ENTRY(extern_type_member_func) node;
+	char name[RTE_SWX_NAME_SIZE];
+	rte_swx_extern_type_member_func_t func;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
+
+struct extern_type {
+	TAILQ_ENTRY(extern_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *mailbox_struct_type;
+	rte_swx_extern_type_constructor_t constructor;
+	rte_swx_extern_type_destructor_t destructor;
+	struct extern_type_member_func_tailq funcs;
+	uint32_t n_funcs;
+};
+
+TAILQ_HEAD(extern_type_tailq, extern_type);
+
+struct extern_obj {
+	TAILQ_ENTRY(extern_obj) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct extern_type *type;
+	void *obj;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_obj_tailq, extern_obj);
+
+#ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
+#define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
+#endif
+
+struct extern_obj_runtime {
+	void *obj;
+	uint8_t *mailbox;
+	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
+};
+
+/*
+ * Extern function.
+ */
+struct extern_func {
+	TAILQ_ENTRY(extern_func) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *mailbox_struct_type;
+	rte_swx_extern_func_t func;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_func_tailq, extern_func);
+
+struct extern_func_runtime {
+	uint8_t *mailbox;
+	rte_swx_extern_func_t func;
+};
+
+/*
+ * Header.
+ */
+struct header {
+	TAILQ_ENTRY(header) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *st;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(header_tailq, header);
+
+struct header_runtime {
+	uint8_t *ptr0;
+	uint32_t n_bytes;
+};
+
+struct header_out_runtime {
+	uint8_t *ptr0;
+	uint8_t *ptr;
+	uint32_t n_bytes;
+};
+
+/*
+ * Instruction.
+ */
+
+/* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
+ * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
+ * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
+ * when transferred to packet meta-data and in NBO when transferred to packet
+ * headers.
+ */
+
+/* Notation conventions:
+ *    -Header field: H = h.header.field (dst/src)
+ *    -Meta-data field: M = m.field (dst/src)
+ *    -Extern object mailbox field: E = e.field (dst/src)
+ *    -Extern function mailbox field: F = f.field (dst/src)
+ *    -Table action data field: T = t.field (src only)
+ *    -Immediate value: I = 32-bit unsigned value (src only)
+ */
+
+enum instruction_type {
+	/* rx m.port_in */
+	INSTR_RX,
+
+	/* tx port_out
+	 * port_out = MI
+	 */
+	INSTR_TX,   /* port_out = M */
+	INSTR_TX_I, /* port_out = I */
+
+	/* extract h.header */
+	INSTR_HDR_EXTRACT,
+	INSTR_HDR_EXTRACT2,
+	INSTR_HDR_EXTRACT3,
+	INSTR_HDR_EXTRACT4,
+	INSTR_HDR_EXTRACT5,
+	INSTR_HDR_EXTRACT6,
+	INSTR_HDR_EXTRACT7,
+	INSTR_HDR_EXTRACT8,
+
+	/* extract h.header m.last_field_size */
+	INSTR_HDR_EXTRACT_M,
+
+	/* lookahead h.header */
+	INSTR_HDR_LOOKAHEAD,
+
+	/* emit h.header */
+	INSTR_HDR_EMIT,
+	INSTR_HDR_EMIT_TX,
+	INSTR_HDR_EMIT2_TX,
+	INSTR_HDR_EMIT3_TX,
+	INSTR_HDR_EMIT4_TX,
+	INSTR_HDR_EMIT5_TX,
+	INSTR_HDR_EMIT6_TX,
+	INSTR_HDR_EMIT7_TX,
+	INSTR_HDR_EMIT8_TX,
+
+	/* validate h.header */
+	INSTR_HDR_VALIDATE,
+
+	/* invalidate h.header */
+	INSTR_HDR_INVALIDATE,
+
+	/* mov dst src
+	 * dst = src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_MOV,    /* dst = MEF, src = MEFT */
+	INSTR_MOV_MH, /* dst = MEF, src = H */
+	INSTR_MOV_HM, /* dst = H, src = MEFT */
+	INSTR_MOV_HH, /* dst = H, src = H */
+	INSTR_MOV_I,  /* dst = HMEF, src = I */
+
+	/* dma h.header t.field
+	 * memcpy(h.header, t.field, sizeof(h.header))
+	 */
+	INSTR_DMA_HT,
+	INSTR_DMA_HT2,
+	INSTR_DMA_HT3,
+	INSTR_DMA_HT4,
+	INSTR_DMA_HT5,
+	INSTR_DMA_HT6,
+	INSTR_DMA_HT7,
+	INSTR_DMA_HT8,
+
+	/* add dst src
+	 * dst += src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
+	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
+	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
+	INSTR_ALU_ADD_HH, /* dst = H, src = H */
+	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
+	INSTR_ALU_ADD_HI, /* dst = H, src = I */
+
+	/* sub dst src
+	 * dst -= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SUB_HH, /* dst = H, src = H */
+	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SUB_HI, /* dst = H, src = I */
+
+	/* ckadd dst src
+	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
+	 * dst = H, src = {H, h.header}
+	 */
+	INSTR_ALU_CKADD_FIELD,    /* src = H */
+	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
+	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
+
+	/* cksub dst src
+	 * dst = dst '- src
+	 * dst = H, src = H
+	 */
+	INSTR_ALU_CKSUB_FIELD,
+
+	/* and dst src
+	 * dst &= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
+	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_AND_HH, /* dst = H, src = H */
+	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
+
+	/* or dst src
+	 * dst |= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_OR_HH, /* dst = H, src = H */
+	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
+
+	/* xor dst src
+	 * dst ^= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_XOR_HH, /* dst = H, src = H */
+	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
+
+	/* shl dst src
+	 * dst <<= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SHL_HH, /* dst = H, src = H */
+	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SHL_HI, /* dst = H, src = I */
+
+	/* shr dst src
+	 * dst >>= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SHR_HH, /* dst = H, src = H */
+	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SHR_HI, /* dst = H, src = I */
+
+	/* regprefetch REGARRAY index
+	 * prefetch REGARRAY[index]
+	 * index = HMEFTI
+	 */
+	INSTR_REGPREFETCH_RH, /* index = H */
+	INSTR_REGPREFETCH_RM, /* index = MEFT */
+	INSTR_REGPREFETCH_RI, /* index = I */
+
+	/* regrd dst REGARRAY index
+	 * dst = REGARRAY[index]
+	 * dst = HMEF, index = HMEFTI
+	 */
+	INSTR_REGRD_HRH, /* dst = H, index = H */
+	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
+	INSTR_REGRD_HRI, /* dst = H, index = I */
+	INSTR_REGRD_MRH, /* dst = MEF, index = H */
+	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
+	INSTR_REGRD_MRI, /* dst = MEF, index = I */
+
+	/* regwr REGARRAY index src
+	 * REGARRAY[index] = src
+	 * index = HMEFTI, src = HMEFTI
+	 */
+	INSTR_REGWR_RHH, /* index = H, src = H */
+	INSTR_REGWR_RHM, /* index = H, src = MEFT */
+	INSTR_REGWR_RHI, /* index = H, src = I */
+	INSTR_REGWR_RMH, /* index = MEFT, src = H */
+	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
+	INSTR_REGWR_RMI, /* index = MEFT, src = I */
+	INSTR_REGWR_RIH, /* index = I, src = H */
+	INSTR_REGWR_RIM, /* index = I, src = MEFT */
+	INSTR_REGWR_RII, /* index = I, src = I */
+
+	/* regadd REGARRAY index src
+	 * REGARRAY[index] += src
+	 * index = HMEFTI, src = HMEFTI
+	 */
+	INSTR_REGADD_RHH, /* index = H, src = H */
+	INSTR_REGADD_RHM, /* index = H, src = MEFT */
+	INSTR_REGADD_RHI, /* index = H, src = I */
+	INSTR_REGADD_RMH, /* index = MEFT, src = H */
+	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
+	INSTR_REGADD_RMI, /* index = MEFT, src = I */
+	INSTR_REGADD_RIH, /* index = I, src = H */
+	INSTR_REGADD_RIM, /* index = I, src = MEFT */
+	INSTR_REGADD_RII, /* index = I, src = I */
+
+	/* metprefetch METARRAY index
+	 * prefetch METARRAY[index]
+	 * index = HMEFTI
+	 */
+	INSTR_METPREFETCH_H, /* index = H */
+	INSTR_METPREFETCH_M, /* index = MEFT */
+	INSTR_METPREFETCH_I, /* index = I */
+
+	/* meter METARRAY index length color_in color_out
+	 * color_out = meter(METARRAY[index], length, color_in)
+	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
+	 */
+	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
+	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
+	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
+	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
+	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
+	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
+	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
+	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
+	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
+	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
+	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
+	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
+
+	/* table TABLE */
+	INSTR_TABLE,
+	INSTR_SELECTOR,
+	INSTR_LEARNER,
+
+	/* learn LEARNER ACTION_NAME */
+	INSTR_LEARNER_LEARN,
+
+	/* forget */
+	INSTR_LEARNER_FORGET,
+
+	/* extern e.obj.func */
+	INSTR_EXTERN_OBJ,
+
+	/* extern f.func */
+	INSTR_EXTERN_FUNC,
+
+	/* jmp LABEL
+	 * Unconditional jump
+	 */
+	INSTR_JMP,
+
+	/* jmpv LABEL h.header
+	 * Jump if header is valid
+	 */
+	INSTR_JMP_VALID,
+
+	/* jmpnv LABEL h.header
+	 * Jump if header is invalid
+	 */
+	INSTR_JMP_INVALID,
+
+	/* jmph LABEL
+	 * Jump if table lookup hit
+	 */
+	INSTR_JMP_HIT,
+
+	/* jmpnh LABEL
+	 * Jump if table lookup miss
+	 */
+	INSTR_JMP_MISS,
+
+	/* jmpa LABEL ACTION
+	 * Jump if action run
+	 */
+	INSTR_JMP_ACTION_HIT,
+
+	/* jmpna LABEL ACTION
+	 * Jump if action not run
+	 */
+	INSTR_JMP_ACTION_MISS,
+
+	/* jmpeq LABEL a b
+	 * Jump if a is equal to b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
+	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
+	INSTR_JMP_EQ_HH, /* a = H, b = H */
+	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
+
+	/* jmpneq LABEL a b
+	 * Jump if a is not equal to b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
+	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
+	INSTR_JMP_NEQ_HH, /* a = H, b = H */
+	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
+
+	/* jmplt LABEL a b
+	 * Jump if a is less than b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
+	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
+	INSTR_JMP_LT_HH, /* a = H, b = H */
+	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
+	INSTR_JMP_LT_HI, /* a = H, b = I */
+
+	/* jmpgt LABEL a b
+	 * Jump if a is greater than b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
+	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
+	INSTR_JMP_GT_HH, /* a = H, b = H */
+	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
+	INSTR_JMP_GT_HI, /* a = H, b = I */
+
+	/* return
+	 * Return from action
+	 */
+	INSTR_RETURN,
+};
+
+struct instr_operand {
+	uint8_t struct_id;
+	uint8_t n_bits;
+	uint8_t offset;
+	uint8_t pad;
+};
+
+struct instr_io {
+	struct {
+		union {
+			struct {
+				uint8_t offset;
+				uint8_t n_bits;
+				uint8_t pad[2];
+			};
+
+			uint32_t val;
+		};
+	} io;
+
+	struct {
+		uint8_t header_id[8];
+		uint8_t struct_id[8];
+		uint8_t n_bytes[8];
+	} hdr;
+};
+
+struct instr_hdr_validity {
+	uint8_t header_id;
+};
+
+struct instr_table {
+	uint8_t table_id;
+};
+
+struct instr_learn {
+	uint8_t action_id;
+};
+
+struct instr_extern_obj {
+	uint8_t ext_obj_id;
+	uint8_t func_id;
+};
+
+struct instr_extern_func {
+	uint8_t ext_func_id;
+};
+
+struct instr_dst_src {
+	struct instr_operand dst;
+	union {
+		struct instr_operand src;
+		uint64_t src_val;
+	};
+};
+
+struct instr_regarray {
+	uint8_t regarray_id;
+	uint8_t pad[3];
+
+	union {
+		struct instr_operand idx;
+		uint32_t idx_val;
+	};
+
+	union {
+		struct instr_operand dstsrc;
+		uint64_t dstsrc_val;
+	};
+};
+
+struct instr_meter {
+	uint8_t metarray_id;
+	uint8_t pad[3];
+
+	union {
+		struct instr_operand idx;
+		uint32_t idx_val;
+	};
+
+	struct instr_operand length;
+
+	union {
+		struct instr_operand color_in;
+		uint32_t color_in_val;
+	};
+
+	struct instr_operand color_out;
+};
+
+struct instr_dma {
+	struct {
+		uint8_t header_id[8];
+		uint8_t struct_id[8];
+	} dst;
+
+	struct {
+		uint8_t offset[8];
+	} src;
+
+	uint16_t n_bytes[8];
+};
+
+struct instr_jmp {
+	struct instruction *ip;
+
+	union {
+		struct instr_operand a;
+		uint8_t header_id;
+		uint8_t action_id;
+	};
+
+	union {
+		struct instr_operand b;
+		uint64_t b_val;
+	};
+};
+
+struct instruction {
+	enum instruction_type type;
+	union {
+		struct instr_io io;
+		struct instr_hdr_validity valid;
+		struct instr_dst_src mov;
+		struct instr_regarray regarray;
+		struct instr_meter meter;
+		struct instr_dma dma;
+		struct instr_dst_src alu;
+		struct instr_table table;
+		struct instr_learn learn;
+		struct instr_extern_obj ext_obj;
+		struct instr_extern_func ext_func;
+		struct instr_jmp jmp;
+	};
+};
+
+struct instruction_data {
+	char label[RTE_SWX_NAME_SIZE];
+	char jmp_label[RTE_SWX_NAME_SIZE];
+	uint32_t n_users; /* user = jmp instruction to this instruction. */
+	int invalid;
+};
+
+/*
+ * Action.
+ */
+struct action {
+	TAILQ_ENTRY(action) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *st;
+	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
+	struct instruction *instructions;
+	uint32_t n_instructions;
+	uint32_t id;
+};
+
+TAILQ_HEAD(action_tailq, action);
+
+/*
+ * Table.
+ */
+struct table_type {
+	TAILQ_ENTRY(table_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	enum rte_swx_table_match_type match_type;
+	struct rte_swx_table_ops ops;
+};
+
+TAILQ_HEAD(table_type_tailq, table_type);
+
+struct match_field {
+	enum rte_swx_table_match_type match_type;
+	struct field *field;
+};
+
+struct table {
+	TAILQ_ENTRY(table) node;
+	char name[RTE_SWX_NAME_SIZE];
+	char args[RTE_SWX_NAME_SIZE];
+	struct table_type *type; /* NULL when n_fields == 0. */
+
+	/* Match. */
+	struct match_field *fields;
+	uint32_t n_fields;
+	struct header *header; /* Only valid when n_fields > 0. */
+
+	/* Action. */
+	struct action **actions;
+	struct action *default_action;
+	uint8_t *default_action_data;
+	uint32_t n_actions;
+	int default_action_is_const;
+	uint32_t action_data_size_max;
+
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(table_tailq, table);
+
+struct table_runtime {
+	rte_swx_table_lookup_t func;
+	void *mailbox;
+	uint8_t **key;
+};
+
+struct table_statistics {
+	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
+	uint64_t *n_pkts_action;
+};
+
+/*
+ * Selector.
+ */
+struct selector {
+	TAILQ_ENTRY(selector) node;
+	char name[RTE_SWX_NAME_SIZE];
+
+	struct field *group_id_field;
+	struct field **selector_fields;
+	uint32_t n_selector_fields;
+	struct header *selector_header;
+	struct field *member_id_field;
+
+	uint32_t n_groups_max;
+	uint32_t n_members_per_group_max;
+
+	uint32_t id;
+};
+
+TAILQ_HEAD(selector_tailq, selector);
+
+struct selector_runtime {
+	void *mailbox;
+	uint8_t **group_id_buffer;
+	uint8_t **selector_buffer;
+	uint8_t **member_id_buffer;
+};
+
+struct selector_statistics {
+	uint64_t n_pkts;
+};
+
+/*
+ * Learner table.
+ */
+struct learner {
+	TAILQ_ENTRY(learner) node;
+	char name[RTE_SWX_NAME_SIZE];
+
+	/* Match. */
+	struct field **fields;
+	uint32_t n_fields;
+	struct header *header;
+
+	/* Action. */
+	struct action **actions;
+	struct field **action_arg;
+	struct action *default_action;
+	uint8_t *default_action_data;
+	uint32_t n_actions;
+	int default_action_is_const;
+	uint32_t action_data_size_max;
+
+	uint32_t size;
+	uint32_t timeout;
+	uint32_t id;
+};
+
+TAILQ_HEAD(learner_tailq, learner);
+
+struct learner_runtime {
+	void *mailbox;
+	uint8_t **key;
+	uint8_t **action_data;
+};
+
+struct learner_statistics {
+	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
+	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
+	uint64_t n_pkts_forget;
+	uint64_t *n_pkts_action;
+};
+
+/*
+ * Register array.
+ */
+struct regarray {
+	TAILQ_ENTRY(regarray) node;
+	char name[RTE_SWX_NAME_SIZE];
+	uint64_t init_val;
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(regarray_tailq, regarray);
+
+struct regarray_runtime {
+	uint64_t *regarray;
+	uint32_t size_mask;
+};
+
+/*
+ * Meter array.
+ */
+struct meter_profile {
+	TAILQ_ENTRY(meter_profile) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_meter_trtcm_params params;
+	struct rte_meter_trtcm_profile profile;
+	uint32_t n_users;
+};
+
+TAILQ_HEAD(meter_profile_tailq, meter_profile);
+
+struct metarray {
+	TAILQ_ENTRY(metarray) node;
+	char name[RTE_SWX_NAME_SIZE];
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(metarray_tailq, metarray);
+
+struct meter {
+	struct rte_meter_trtcm m;
+	struct meter_profile *profile;
+	enum rte_color color_mask;
+	uint8_t pad[20];
+
+	uint64_t n_pkts[RTE_COLORS];
+	uint64_t n_bytes[RTE_COLORS];
+};
+
+struct metarray_runtime {
+	struct meter *metarray;
+	uint32_t size_mask;
+};
+
+/*
+ * Pipeline.
+ */
+struct thread {
+	/* Packet. */
+	struct rte_swx_pkt pkt;
+	uint8_t *ptr;
+
+	/* Structures. */
+	uint8_t **structs;
+
+	/* Packet headers. */
+	struct header_runtime *headers; /* Extracted or generated headers. */
+	struct header_out_runtime *headers_out; /* Emitted headers. */
+	uint8_t *header_storage;
+	uint8_t *header_out_storage;
+	uint64_t valid_headers;
+	uint32_t n_headers_out;
+
+	/* Packet meta-data. */
+	uint8_t *metadata;
+
+	/* Tables. */
+	struct table_runtime *tables;
+	struct selector_runtime *selectors;
+	struct learner_runtime *learners;
+	struct rte_swx_table_state *table_state;
+	uint64_t action_id;
+	int hit; /* 0 = Miss, 1 = Hit. */
+	uint32_t learner_id;
+	uint64_t time;
+
+	/* Extern objects and functions. */
+	struct extern_obj_runtime *extern_objs;
+	struct extern_func_runtime *extern_funcs;
+
+	/* Instructions. */
+	struct instruction *ip;
+	struct instruction *ret;
+};
+
+#define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
+#define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
+#define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
+
+#define HEADER_VALID(thread, header_id) \
+	MASK64_BIT_GET((thread)->valid_headers, header_id)
+
+#define ALU(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define ALU_MH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#define ALU_HM(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#define ALU_HM_FAST(thread, ip, operator)  \
+{                                                                                 \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
+	uint64_t dst64 = *dst64_ptr;                                              \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
+	uint64_t dst = dst64 & dst64_mask;                                        \
+										  \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
+	uint64_t src64 = *src64_ptr;                                              \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
+	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
+										  \
+	uint64_t result = dst operator src;                                       \
+										  \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
+}
+
+#define ALU_HH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#define ALU_HH_FAST(thread, ip, operator)  \
+{                                                                                             \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
+	uint64_t dst64 = *dst64_ptr;                                                          \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
+	uint64_t dst = dst64 & dst64_mask;                                                    \
+											      \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
+	uint64_t src64 = *src64_ptr;                                                          \
+	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
+											      \
+	uint64_t result = dst operator src;                                                   \
+											      \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
+}
+
+#else
+
+#define ALU_MH ALU
+#define ALU_HM ALU
+#define ALU_HM_FAST ALU
+#define ALU_HH ALU
+#define ALU_HH_FAST ALU
+
+#endif
+
+#define ALU_I(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint64_t src = (ip)->alu.src_val;                                      \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#define ALU_MI ALU_I
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define ALU_HI(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint64_t src = (ip)->alu.src_val;                                      \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#else
+
+#define ALU_HI ALU_I
+
+#endif
+
+#define MOV(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define MOV_MH(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#define MOV_HM(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
+	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
+}
+
+#define MOV_HH(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+									       \
+	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
+	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
+	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
+}
+
+#else
+
+#define MOV_MH MOV
+#define MOV_HM MOV
+#define MOV_HH MOV
+
+#endif
+
+#define MOV_I(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint64_t src = (ip)->mov.src_val;                                      \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#define JMP_CMP(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
+	uint64_t b = b64 & b64_mask;                                           \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define JMP_CMP_MH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HM(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
+	uint64_t b = b64 & b64_mask;                                           \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HH_FAST(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#else
+
+#define JMP_CMP_MH JMP_CMP
+#define JMP_CMP_HM JMP_CMP
+#define JMP_CMP_HH JMP_CMP
+#define JMP_CMP_HH_FAST JMP_CMP
+
+#endif
+
+#define JMP_CMP_I(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint64_t b = (ip)->jmp.b_val;                                          \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_MI JMP_CMP_I
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define JMP_CMP_HI(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint64_t b = (ip)->jmp.b_val;                                          \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#else
+
+#define JMP_CMP_HI JMP_CMP_I
+
+#endif
+
+#define METADATA_READ(thread, offset, n_bits)                                  \
+({                                                                             \
+	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
+	uint64_t m64 = *m64_ptr;                                               \
+	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
+	(m64 & m64_mask);                                                      \
+})
+
+#define METADATA_WRITE(thread, offset, n_bits, value)                          \
+{                                                                              \
+	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
+	uint64_t m64 = *m64_ptr;                                               \
+	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
+									       \
+	uint64_t m_new = value;                                                \
+									       \
+	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
+}
+
+#ifndef RTE_SWX_PIPELINE_THREADS_MAX
+#define RTE_SWX_PIPELINE_THREADS_MAX 16
+#endif
+
+struct rte_swx_pipeline {
+	struct struct_type_tailq struct_types;
+	struct port_in_type_tailq port_in_types;
+	struct port_in_tailq ports_in;
+	struct port_out_type_tailq port_out_types;
+	struct port_out_tailq ports_out;
+	struct extern_type_tailq extern_types;
+	struct extern_obj_tailq extern_objs;
+	struct extern_func_tailq extern_funcs;
+	struct header_tailq headers;
+	struct struct_type *metadata_st;
+	uint32_t metadata_struct_id;
+	struct action_tailq actions;
+	struct table_type_tailq table_types;
+	struct table_tailq tables;
+	struct selector_tailq selectors;
+	struct learner_tailq learners;
+	struct regarray_tailq regarrays;
+	struct meter_profile_tailq meter_profiles;
+	struct metarray_tailq metarrays;
+
+	struct port_in_runtime *in;
+	struct port_out_runtime *out;
+	struct instruction **action_instructions;
+	struct rte_swx_table_state *table_state;
+	struct table_statistics *table_stats;
+	struct selector_statistics *selector_stats;
+	struct learner_statistics *learner_stats;
+	struct regarray_runtime *regarray_runtime;
+	struct metarray_runtime *metarray_runtime;
+	struct instruction *instructions;
+	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
+
+	uint32_t n_structs;
+	uint32_t n_ports_in;
+	uint32_t n_ports_out;
+	uint32_t n_extern_objs;
+	uint32_t n_extern_funcs;
+	uint32_t n_actions;
+	uint32_t n_tables;
+	uint32_t n_selectors;
+	uint32_t n_learners;
+	uint32_t n_regarrays;
+	uint32_t n_metarrays;
+	uint32_t n_headers;
+	uint32_t thread_id;
+	uint32_t port_id;
+	uint32_t n_instructions;
+	int build_done;
+	int numa_node;
+};
+
+#endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 02/24] pipeline: move thread inline functions to header file
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
                   ` (22 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Move the thread inline functions to the internal header file.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 56 ----------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 59 ++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ae9b2056db..7e01453c27 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1492,62 +1492,6 @@ struct_field_parse(struct rte_swx_pipeline *p,
 	}
 }
 
-static inline void
-pipeline_port_inc(struct rte_swx_pipeline *p)
-{
-	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
-}
-
-static inline void
-thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
-{
-	t->ip = p->instructions;
-}
-
-static inline void
-thread_ip_set(struct thread *t, struct instruction *ip)
-{
-	t->ip = ip;
-}
-
-static inline void
-thread_ip_action_call(struct rte_swx_pipeline *p,
-		      struct thread *t,
-		      uint32_t action_id)
-{
-	t->ret = t->ip + 1;
-	t->ip = p->action_instructions[action_id];
-}
-
-static inline void
-thread_ip_inc(struct rte_swx_pipeline *p);
-
-static inline void
-thread_ip_inc(struct rte_swx_pipeline *p)
-{
-	struct thread *t = &p->threads[p->thread_id];
-
-	t->ip++;
-}
-
-static inline void
-thread_ip_inc_cond(struct thread *t, int cond)
-{
-	t->ip += cond;
-}
-
-static inline void
-thread_yield(struct rte_swx_pipeline *p)
-{
-	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
-}
-
-static inline void
-thread_yield_cond(struct rte_swx_pipeline *p, int cond)
-{
-	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
-}
-
 /*
  * rx.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 5d80dd8451..682f4c86a0 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1380,4 +1380,63 @@ struct rte_swx_pipeline {
 	int numa_node;
 };
 
+/*
+ * Instruction.
+ */
+static inline void
+pipeline_port_inc(struct rte_swx_pipeline *p)
+{
+	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
+}
+
+static inline void
+thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
+{
+	t->ip = p->instructions;
+}
+
+static inline void
+thread_ip_set(struct thread *t, struct instruction *ip)
+{
+	t->ip = ip;
+}
+
+static inline void
+thread_ip_action_call(struct rte_swx_pipeline *p,
+		      struct thread *t,
+		      uint32_t action_id)
+{
+	t->ret = t->ip + 1;
+	t->ip = p->action_instructions[action_id];
+}
+
+static inline void
+thread_ip_inc(struct rte_swx_pipeline *p);
+
+static inline void
+thread_ip_inc(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+
+	t->ip++;
+}
+
+static inline void
+thread_ip_inc_cond(struct thread *t, int cond)
+{
+	t->ip += cond;
+}
+
+static inline void
+thread_yield(struct rte_swx_pipeline *p)
+{
+	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
+}
+
+static inline void
+thread_yield_cond(struct rte_swx_pipeline *p, int cond)
+{
+	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 03/24] pipeline: create inline functions for RX instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
                   ` (21 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 38 ------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 51 ++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 7e01453c27..ad1ecfc640 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1517,44 +1517,6 @@ instr_rx_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline void
-instr_rx_exec(struct rte_swx_pipeline *p);
-
-static inline void
-instr_rx_exec(struct rte_swx_pipeline *p)
-{
-	struct thread *t = &p->threads[p->thread_id];
-	struct instruction *ip = t->ip;
-	struct port_in_runtime *port = &p->in[p->port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
-	int pkt_received;
-
-	/* Packet. */
-	pkt_received = port->pkt_rx(port->obj, pkt);
-	t->ptr = &pkt->pkt[pkt->offset];
-	rte_prefetch0(t->ptr);
-
-	TRACE("[Thread %2u] rx %s from port %u\n",
-	      p->thread_id,
-	      pkt_received ? "1 pkt" : "0 pkts",
-	      p->port_id);
-
-	/* Headers. */
-	t->valid_headers = 0;
-	t->n_headers_out = 0;
-
-	/* Meta-data. */
-	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
-
-	/* Tables. */
-	t->table_state = p->table_state;
-
-	/* Thread. */
-	pipeline_port_inc(p);
-	thread_ip_inc_cond(t, pkt_received);
-	thread_yield(p);
-}
-
 /*
  * tx.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 682f4c86a0..9814b5685a 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1439,4 +1439,55 @@ thread_yield_cond(struct rte_swx_pipeline *p, int cond)
 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
 }
 
+/*
+ * rx.
+ */
+static inline int
+__instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct port_in_runtime *port = &p->in[p->port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+	int pkt_received;
+
+	/* Packet. */
+	pkt_received = port->pkt_rx(port->obj, pkt);
+	t->ptr = &pkt->pkt[pkt->offset];
+	rte_prefetch0(t->ptr);
+
+	TRACE("[Thread %2u] rx %s from port %u\n",
+	      p->thread_id,
+	      pkt_received ? "1 pkt" : "0 pkts",
+	      p->port_id);
+
+	/* Headers. */
+	t->valid_headers = 0;
+	t->n_headers_out = 0;
+
+	/* Meta-data. */
+	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
+
+	/* Tables. */
+	t->table_state = p->table_state;
+
+	/* Thread. */
+	pipeline_port_inc(p);
+
+	return pkt_received;
+}
+
+static inline void
+instr_rx_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	int pkt_received;
+
+	/* Packet. */
+	pkt_received = __instr_rx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc_cond(t, pkt_received);
+	thread_yield(p);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 04/24] pipeline: create inline functions for TX instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
                   ` (20 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 86 +---------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 90 ++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 84 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ad1ecfc640..bcf796f8c3 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1567,84 +1567,13 @@ instr_drop_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline void
-emit_handler(struct thread *t)
-{
-	struct header_out_runtime *h0 = &t->headers_out[0];
-	struct header_out_runtime *h1 = &t->headers_out[1];
-	uint32_t offset = 0, i;
-
-	/* No header change or header decapsulation. */
-	if ((t->n_headers_out == 1) &&
-	    (h0->ptr + h0->n_bytes == t->ptr)) {
-		TRACE("Emit handler: no header change or header decap.\n");
-
-		t->pkt.offset -= h0->n_bytes;
-		t->pkt.length += h0->n_bytes;
-
-		return;
-	}
-
-	/* Header encapsulation (optionally, with prior header decasulation). */
-	if ((t->n_headers_out == 2) &&
-	    (h1->ptr + h1->n_bytes == t->ptr) &&
-	    (h0->ptr == h0->ptr0)) {
-		uint32_t offset;
-
-		TRACE("Emit handler: header encapsulation.\n");
-
-		offset = h0->n_bytes + h1->n_bytes;
-		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
-		t->pkt.offset -= offset;
-		t->pkt.length += offset;
-
-		return;
-	}
-
-	/* Header insertion. */
-	/* TBD */
-
-	/* Header extraction. */
-	/* TBD */
-
-	/* For any other case. */
-	TRACE("Emit handler: complex case.\n");
-
-	for (i = 0; i < t->n_headers_out; i++) {
-		struct header_out_runtime *h = &t->headers_out[i];
-
-		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
-		offset += h->n_bytes;
-	}
-
-	if (offset) {
-		memcpy(t->ptr - offset, t->header_out_storage, offset);
-		t->pkt.offset -= offset;
-		t->pkt.length += offset;
-	}
-}
-
-static inline void
-instr_tx_exec(struct rte_swx_pipeline *p);
-
 static inline void
 instr_tx_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
-	struct port_out_runtime *port = &p->out[port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
 
-	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
-	      p->thread_id,
-	      (uint32_t)port_id);
-
-	/* Headers. */
-	emit_handler(t);
-
-	/* Packet. */
-	port->pkt_tx(port->obj, pkt);
+	__instr_tx_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_reset(p, t);
@@ -1656,19 +1585,8 @@ instr_tx_i_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t port_id = ip->io.io.val;
-	struct port_out_runtime *port = &p->out[port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
-
-	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
-	      p->thread_id,
-	      (uint32_t)port_id);
-
-	/* Headers. */
-	emit_handler(t);
 
-	/* Packet. */
-	port->pkt_tx(port->obj, pkt);
+	__instr_tx_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_reset(p, t);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 9814b5685a..e9fe6632b6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1490,4 +1490,94 @@ instr_rx_exec(struct rte_swx_pipeline *p)
 	thread_yield(p);
 }
 
+/*
+ * tx.
+ */
+static inline void
+emit_handler(struct thread *t)
+{
+	struct header_out_runtime *h0 = &t->headers_out[0];
+	struct header_out_runtime *h1 = &t->headers_out[1];
+	uint32_t offset = 0, i;
+
+	/* No header change or header decapsulation. */
+	if ((t->n_headers_out == 1) &&
+	    (h0->ptr + h0->n_bytes == t->ptr)) {
+		TRACE("Emit handler: no header change or header decap.\n");
+
+		t->pkt.offset -= h0->n_bytes;
+		t->pkt.length += h0->n_bytes;
+
+		return;
+	}
+
+	/* Header encapsulation (optionally, with prior header decasulation). */
+	if ((t->n_headers_out == 2) &&
+	    (h1->ptr + h1->n_bytes == t->ptr) &&
+	    (h0->ptr == h0->ptr0)) {
+		uint32_t offset;
+
+		TRACE("Emit handler: header encapsulation.\n");
+
+		offset = h0->n_bytes + h1->n_bytes;
+		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
+		t->pkt.offset -= offset;
+		t->pkt.length += offset;
+
+		return;
+	}
+
+	/* For any other case. */
+	TRACE("Emit handler: complex case.\n");
+
+	for (i = 0; i < t->n_headers_out; i++) {
+		struct header_out_runtime *h = &t->headers_out[i];
+
+		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
+		offset += h->n_bytes;
+	}
+
+	if (offset) {
+		memcpy(t->ptr - offset, t->header_out_storage, offset);
+		t->pkt.offset -= offset;
+		t->pkt.length += offset;
+	}
+}
+
+static inline void
+__instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
+	struct port_out_runtime *port = &p->out[port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+
+	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
+	      p->thread_id,
+	      (uint32_t)port_id);
+
+	/* Headers. */
+	emit_handler(t);
+
+	/* Packet. */
+	port->pkt_tx(port->obj, pkt);
+}
+
+static inline void
+__instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t port_id = ip->io.io.val;
+	struct port_out_runtime *port = &p->out[port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+
+	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
+	      p->thread_id,
+	      (uint32_t)port_id);
+
+	/* Headers. */
+	emit_handler(t);
+
+	/* Packet. */
+	port->pkt_tx(port->obj, pkt);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 05/24] pipeline: create inline functions for extract instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (2 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
                   ` (19 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 130 ++++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 178 +++++++++++++++++++++++
 2 files changed, 203 insertions(+), 105 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index bcf796f8c3..fd7e31b709 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1665,52 +1665,12 @@ instr_hdr_lookahead_translate(struct rte_swx_pipeline *p,
 }
 
 static inline void
-__instr_hdr_extract_exec(struct rte_swx_pipeline *p, uint32_t n_extract);
-
-static inline void
-__instr_hdr_extract_exec(struct rte_swx_pipeline *p, uint32_t n_extract)
+instr_hdr_extract_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-	uint32_t offset = t->pkt.offset;
-	uint32_t length = t->pkt.length;
-	uint32_t i;
 
-	for (i = 0; i < n_extract; i++) {
-		uint32_t header_id = ip->io.hdr.header_id[i];
-		uint32_t struct_id = ip->io.hdr.struct_id[i];
-		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
-
-		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
-		      p->thread_id,
-		      header_id,
-		      n_bytes);
-
-		/* Headers. */
-		t->structs[struct_id] = ptr;
-		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-
-		/* Packet. */
-		offset += n_bytes;
-		length -= n_bytes;
-		ptr += n_bytes;
-	}
-
-	/* Headers. */
-	t->valid_headers = valid_headers;
-
-	/* Packet. */
-	t->pkt.offset = offset;
-	t->pkt.length = length;
-	t->ptr = ptr;
-}
-
-static inline void
-instr_hdr_extract_exec(struct rte_swx_pipeline *p)
-{
-	__instr_hdr_extract_exec(p, 1);
+	__instr_hdr_extract_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1719,10 +1679,10 @@ instr_hdr_extract_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract2_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 2);
+	__instr_hdr_extract2_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1731,10 +1691,10 @@ instr_hdr_extract2_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract3_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 3);
+	__instr_hdr_extract3_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1743,10 +1703,10 @@ instr_hdr_extract3_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract4_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 4);
+	__instr_hdr_extract4_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1755,10 +1715,10 @@ instr_hdr_extract4_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract5_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 5);
+	__instr_hdr_extract5_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1767,10 +1727,10 @@ instr_hdr_extract5_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract6_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 6);
+	__instr_hdr_extract6_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1779,10 +1739,10 @@ instr_hdr_extract6_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract7_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 7);
+	__instr_hdr_extract7_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1791,10 +1751,10 @@ instr_hdr_extract7_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract8_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 8);
+	__instr_hdr_extract8_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1806,35 +1766,7 @@ instr_hdr_extract_m_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-	uint32_t offset = t->pkt.offset;
-	uint32_t length = t->pkt.length;
-
-	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
-	uint32_t header_id = ip->io.hdr.header_id[0];
-	uint32_t struct_id = ip->io.hdr.struct_id[0];
-	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
-
-	struct header_runtime *h = &t->headers[header_id];
-
-	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
-	      p->thread_id,
-	      header_id,
-	      n_bytes,
-	      n_bytes_last);
-
-	n_bytes += n_bytes_last;
-
-	/* Headers. */
-	t->structs[struct_id] = ptr;
-	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-	h->n_bytes = n_bytes;
-
-	/* Packet. */
-	t->pkt.offset = offset + n_bytes;
-	t->pkt.length = length - n_bytes;
-	t->ptr = ptr + n_bytes;
+	__instr_hdr_extract_m_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1846,19 +1778,7 @@ instr_hdr_lookahead_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-
-	uint32_t header_id = ip->io.hdr.header_id[0];
-	uint32_t struct_id = ip->io.hdr.struct_id[0];
-
-	TRACE("[Thread %2u]: lookahead header %u\n",
-	      p->thread_id,
-	      header_id);
-
-	/* Headers. */
-	t->structs[struct_id] = ptr;
-	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	__instr_hdr_lookahead_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index e9fe6632b6..1519bcc305 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1580,4 +1580,182 @@ __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct ins
 	port->pkt_tx(port->obj, pkt);
 }
 
+/*
+ * extract.
+ */
+static inline void
+__instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			      struct thread *t,
+			      const struct instruction *ip,
+			      uint32_t n_extract)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+	uint32_t offset = t->pkt.offset;
+	uint32_t length = t->pkt.length;
+	uint32_t i;
+
+	for (i = 0; i < n_extract; i++) {
+		uint32_t header_id = ip->io.hdr.header_id[i];
+		uint32_t struct_id = ip->io.hdr.struct_id[i];
+		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
+
+		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
+		      p->thread_id,
+		      header_id,
+		      n_bytes);
+
+		/* Headers. */
+		t->structs[struct_id] = ptr;
+		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+
+		/* Packet. */
+		offset += n_bytes;
+		length -= n_bytes;
+		ptr += n_bytes;
+	}
+
+	/* Headers. */
+	t->valid_headers = valid_headers;
+
+	/* Packet. */
+	t->pkt.offset = offset;
+	t->pkt.length = length;
+	t->ptr = ptr;
+}
+
+static inline void
+__instr_hdr_extract_exec(struct rte_swx_pipeline *p,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	__instr_hdr_extract_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 2);
+}
+
+static inline void
+__instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 3);
+}
+
+static inline void
+__instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 4);
+}
+
+static inline void
+__instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 5);
+}
+
+static inline void
+__instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 6);
+}
+
+static inline void
+__instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 7);
+}
+
+static inline void
+__instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 8);
+}
+
+static inline void
+__instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+	uint32_t offset = t->pkt.offset;
+	uint32_t length = t->pkt.length;
+
+	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
+	uint32_t header_id = ip->io.hdr.header_id[0];
+	uint32_t struct_id = ip->io.hdr.struct_id[0];
+	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
+
+	struct header_runtime *h = &t->headers[header_id];
+
+	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
+	      p->thread_id,
+	      header_id,
+	      n_bytes,
+	      n_bytes_last);
+
+	n_bytes += n_bytes_last;
+
+	/* Headers. */
+	t->structs[struct_id] = ptr;
+	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	h->n_bytes = n_bytes;
+
+	/* Packet. */
+	t->pkt.offset = offset + n_bytes;
+	t->pkt.length = length - n_bytes;
+	t->ptr = ptr + n_bytes;
+}
+
+static inline void
+__instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+
+	uint32_t header_id = ip->io.hdr.header_id[0];
+	uint32_t struct_id = ip->io.hdr.struct_id[0];
+
+	TRACE("[Thread %2u]: lookahead header %u\n",
+	      p->thread_id,
+	      header_id);
+
+	/* Headers. */
+	t->structs[struct_id] = ptr;
+	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 06/24] pipeline: create inline functions for emit instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (3 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
                   ` (18 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 162 ++++++++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 170 +++++++++++++++++++++++
 2 files changed, 228 insertions(+), 104 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index fd7e31b709..80c5fb94bb 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1810,82 +1810,12 @@ instr_hdr_emit_translate(struct rte_swx_pipeline *p,
 }
 
 static inline void
-__instr_hdr_emit_exec(struct rte_swx_pipeline *p, uint32_t n_emit);
-
-static inline void
-__instr_hdr_emit_exec(struct rte_swx_pipeline *p, uint32_t n_emit)
+instr_hdr_emit_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t valid_headers = t->valid_headers;
-	uint32_t n_headers_out = t->n_headers_out;
-	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
-	uint8_t *ho_ptr = NULL;
-	uint32_t ho_nbytes = 0, first = 1, i;
-
-	for (i = 0; i < n_emit; i++) {
-		uint32_t header_id = ip->io.hdr.header_id[i];
-		uint32_t struct_id = ip->io.hdr.struct_id[i];
-
-		struct header_runtime *hi = &t->headers[header_id];
-		uint8_t *hi_ptr0 = hi->ptr0;
-		uint32_t n_bytes = hi->n_bytes;
-
-		uint8_t *hi_ptr = t->structs[struct_id];
-
-		if (!MASK64_BIT_GET(valid_headers, header_id))
-			continue;
-
-		TRACE("[Thread %2u]: emit header %u\n",
-		      p->thread_id,
-		      header_id);
-
-		/* Headers. */
-		if (first) {
-			first = 0;
-
-			if (!t->n_headers_out) {
-				ho = &t->headers_out[0];
-
-				ho->ptr0 = hi_ptr0;
-				ho->ptr = hi_ptr;
-
-				ho_ptr = hi_ptr;
-				ho_nbytes = n_bytes;
-
-				n_headers_out = 1;
-
-				continue;
-			} else {
-				ho_ptr = ho->ptr;
-				ho_nbytes = ho->n_bytes;
-			}
-		}
-
-		if (ho_ptr + ho_nbytes == hi_ptr) {
-			ho_nbytes += n_bytes;
-		} else {
-			ho->n_bytes = ho_nbytes;
-
-			ho++;
-			ho->ptr0 = hi_ptr0;
-			ho->ptr = hi_ptr;
 
-			ho_ptr = hi_ptr;
-			ho_nbytes = n_bytes;
-
-			n_headers_out++;
-		}
-	}
-
-	ho->n_bytes = ho_nbytes;
-	t->n_headers_out = n_headers_out;
-}
-
-static inline void
-instr_hdr_emit_exec(struct rte_swx_pipeline *p)
-{
-	__instr_hdr_emit_exec(p, 1);
+	__instr_hdr_emit_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1894,81 +1824,105 @@ instr_hdr_emit_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 1);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_emit_exec(p, 2);
-	instr_tx_exec(p);
+	__instr_hdr_emit2_tx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit3_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 3);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit4_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 4);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_emit_exec(p, 5);
-	instr_tx_exec(p);
+	__instr_hdr_emit5_tx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit6_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 6);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit7_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 7);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit8_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 8);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 /*
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 1519bcc305..8b37a9812e 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1758,4 +1758,174 @@ __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
 }
 
+/*
+ * emit.
+ */
+static inline void
+__instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip,
+			   uint32_t n_emit)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint32_t n_headers_out = t->n_headers_out;
+	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
+	uint8_t *ho_ptr = NULL;
+	uint32_t ho_nbytes = 0, first = 1, i;
+
+	for (i = 0; i < n_emit; i++) {
+		uint32_t header_id = ip->io.hdr.header_id[i];
+		uint32_t struct_id = ip->io.hdr.struct_id[i];
+
+		struct header_runtime *hi = &t->headers[header_id];
+		uint8_t *hi_ptr0 = hi->ptr0;
+		uint32_t n_bytes = hi->n_bytes;
+
+		uint8_t *hi_ptr = t->structs[struct_id];
+
+		if (!MASK64_BIT_GET(valid_headers, header_id))
+			continue;
+
+		TRACE("[Thread %2u]: emit header %u\n",
+		      p->thread_id,
+		      header_id);
+
+		/* Headers. */
+		if (first) {
+			first = 0;
+
+			if (!t->n_headers_out) {
+				ho = &t->headers_out[0];
+
+				ho->ptr0 = hi_ptr0;
+				ho->ptr = hi_ptr;
+
+				ho_ptr = hi_ptr;
+				ho_nbytes = n_bytes;
+
+				n_headers_out = 1;
+
+				continue;
+			} else {
+				ho_ptr = ho->ptr;
+				ho_nbytes = ho->n_bytes;
+			}
+		}
+
+		if (ho_ptr + ho_nbytes == hi_ptr) {
+			ho_nbytes += n_bytes;
+		} else {
+			ho->n_bytes = ho_nbytes;
+
+			ho++;
+			ho->ptr0 = hi_ptr0;
+			ho->ptr = hi_ptr;
+
+			ho_ptr = hi_ptr;
+			ho_nbytes = n_bytes;
+
+			n_headers_out++;
+		}
+	}
+
+	ho->n_bytes = ho_nbytes;
+	t->n_headers_out = n_headers_out;
+}
+
+static inline void
+__instr_hdr_emit_exec(struct rte_swx_pipeline *p,
+		      struct thread *t,
+		      const struct instruction *ip)
+{
+	__instr_hdr_emit_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 1);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 2);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 3);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 4);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 5);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 6);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 7);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 8);
+	__instr_tx_exec(p, t, ip);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 07/24] pipeline: create inline functions for validate instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (4 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
                   ` (17 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 12 ++-------
 lib/pipeline/rte_swx_pipeline_internal.h | 32 ++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 80c5fb94bb..6c6d8e52a5 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1953,12 +1953,8 @@ instr_hdr_validate_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t header_id = ip->valid.header_id;
 
-	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
-
-	/* Headers. */
-	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+	__instr_hdr_validate_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1992,12 +1988,8 @@ instr_hdr_invalidate_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t header_id = ip->valid.header_id;
-
-	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
 
-	/* Headers. */
-	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
+	__instr_hdr_invalidate_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 8b37a9812e..312490f11a 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1928,4 +1928,36 @@ __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
 	__instr_tx_exec(p, t, ip);
 }
 
+/*
+ * validate.
+ */
+static inline void
+__instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	uint32_t header_id = ip->valid.header_id;
+
+	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
+
+	/* Headers. */
+	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+}
+
+/*
+ * invalidate.
+ */
+static inline void
+__instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint32_t header_id = ip->valid.header_id;
+
+	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
+
+	/* Headers. */
+	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 08/24] pipeline: create inline functions for learn instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (5 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
                   ` (16 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 36 ++--------------
 lib/pipeline/rte_swx_pipeline_internal.h | 55 ++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 6c6d8e52a5..ca12f34b01 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2230,27 +2230,8 @@ instr_learn_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t action_id = ip->learn.action_id;
-	uint32_t learner_id = t->learner_id;
-	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
-		p->n_selectors + learner_id];
-	struct learner_runtime *l = &t->learners[learner_id];
-	struct learner_statistics *stats = &p->learner_stats[learner_id];
-	uint32_t status;
-
-	/* Table. */
-	status = rte_swx_table_learner_add(ts->obj,
-					   l->mailbox,
-					   t->time,
-					   action_id,
-					   l->action_data[action_id]);
-
-	TRACE("[Thread %2u] learner %u learn %s\n",
-	      p->thread_id,
-	      learner_id,
-	      status ? "ok" : "error");
 
-	stats->n_pkts_learn[status] += 1;
+	__instr_learn_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2279,20 +2260,9 @@ static inline void
 instr_forget_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
-	uint32_t learner_id = t->learner_id;
-	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
-		p->n_selectors + learner_id];
-	struct learner_runtime *l = &t->learners[learner_id];
-	struct learner_statistics *stats = &p->learner_stats[learner_id];
-
-	/* Table. */
-	rte_swx_table_learner_delete(ts->obj, l->mailbox);
-
-	TRACE("[Thread %2u] learner %u forget\n",
-	      p->thread_id,
-	      learner_id);
+	struct instruction *ip = t->ip;
 
-	stats->n_pkts_forget += 1;
+	__instr_forget_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 312490f11a..24096a23b6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1960,4 +1960,59 @@ __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
 }
 
+/*
+ * learn.
+ */
+static inline void
+__instr_learn_exec(struct rte_swx_pipeline *p,
+		   struct thread *t,
+		   const struct instruction *ip)
+{
+	uint64_t action_id = ip->learn.action_id;
+	uint32_t learner_id = t->learner_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+	uint32_t status;
+
+	/* Table. */
+	status = rte_swx_table_learner_add(ts->obj,
+					   l->mailbox,
+					   t->time,
+					   action_id,
+					   l->action_data[action_id]);
+
+	TRACE("[Thread %2u] learner %u learn %s\n",
+	      p->thread_id,
+	      learner_id,
+	      status ? "ok" : "error");
+
+	stats->n_pkts_learn[status] += 1;
+}
+
+/*
+ * forget.
+ */
+static inline void
+__instr_forget_exec(struct rte_swx_pipeline *p,
+		    struct thread *t,
+		    const struct instruction *ip __rte_unused)
+{
+	uint32_t learner_id = t->learner_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+
+	/* Table. */
+	rte_swx_table_learner_delete(ts->obj, l->mailbox);
+
+	TRACE("[Thread %2u] learner %u forget\n",
+	      p->thread_id,
+	      learner_id);
+
+	stats->n_pkts_forget += 1;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 09/24] pipeline: create inline functions for extern instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (6 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
                   ` (15 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 22 +++---------
 lib/pipeline/rte_swx_pipeline_internal.h | 43 ++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ca12f34b01..c9e29230c2 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2317,18 +2317,10 @@ instr_extern_obj_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t obj_id = ip->ext_obj.ext_obj_id;
-	uint32_t func_id = ip->ext_obj.func_id;
-	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
-	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
-
-	TRACE("[Thread %2u] extern obj %u member func %u\n",
-	      p->thread_id,
-	      obj_id,
-	      func_id);
+	uint32_t done;
 
 	/* Extern object member function execute. */
-	uint32_t done = func(obj->obj, obj->mailbox);
+	done = __instr_extern_obj_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc_cond(t, done);
@@ -2340,16 +2332,10 @@ instr_extern_func_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t ext_func_id = ip->ext_func.ext_func_id;
-	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
-	rte_swx_extern_func_t func = ext_func->func;
-
-	TRACE("[Thread %2u] extern func %u\n",
-	      p->thread_id,
-	      ext_func_id);
+	uint32_t done;
 
 	/* Extern function execute. */
-	uint32_t done = func(ext_func->mailbox);
+	done = __instr_extern_func_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc_cond(t, done);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 24096a23b6..14d6d88344 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2015,4 +2015,47 @@ __instr_forget_exec(struct rte_swx_pipeline *p,
 	stats->n_pkts_forget += 1;
 }
 
+/*
+ * extern.
+ */
+static inline uint32_t
+__instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint32_t obj_id = ip->ext_obj.ext_obj_id;
+	uint32_t func_id = ip->ext_obj.func_id;
+	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
+	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
+	uint32_t done;
+
+	TRACE("[Thread %2u] extern obj %u member func %u\n",
+	      p->thread_id,
+	      obj_id,
+	      func_id);
+
+	done = func(obj->obj, obj->mailbox);
+
+	return done;
+}
+
+static inline uint32_t
+__instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	uint32_t ext_func_id = ip->ext_func.ext_func_id;
+	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
+	rte_swx_extern_func_t func = ext_func->func;
+	uint32_t done;
+
+	TRACE("[Thread %2u] extern func %u\n",
+	      p->thread_id,
+	      ext_func_id);
+
+	done = func(ext_func->mailbox);
+
+	return done;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 10/24] pipeline: create inline functions for move instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (7 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
                   ` (14 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 26 +++---------
 lib/pipeline/rte_swx_pipeline_internal.h | 53 ++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index c9e29230c2..72606f1a06 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2407,10 +2407,7 @@ instr_mov_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov\n",
-	      p->thread_id);
-
-	MOV(t, ip);
+	__instr_mov_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2422,10 +2419,7 @@ instr_mov_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (mh)\n",
-	      p->thread_id);
-
-	MOV_MH(t, ip);
+	__instr_mov_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2437,10 +2431,7 @@ instr_mov_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (hm)\n",
-	      p->thread_id);
-
-	MOV_HM(t, ip);
+	__instr_mov_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2452,10 +2443,7 @@ instr_mov_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (hh)\n",
-	      p->thread_id);
-
-	MOV_HH(t, ip);
+	__instr_mov_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2467,11 +2455,7 @@ instr_mov_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n",
-	      p->thread_id,
-	      ip->mov.src_val);
-
-	MOV_I(t, ip);
+	__instr_mov_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 14d6d88344..1bf94159a9 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2058,4 +2058,57 @@ __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
 	return done;
 }
 
+/*
+ * mov.
+ */
+static inline void
+__instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
+		 struct thread *t,
+		 const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov\n", p->thread_id);
+
+	MOV(t, ip);
+}
+
+static inline void
+__instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
+
+	MOV_MH(t, ip);
+}
+
+static inline void
+__instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
+
+	MOV_HM(t, ip);
+}
+
+static inline void
+__instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
+
+	MOV_HH(t, ip);
+}
+
+static inline void
+__instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		   struct thread *t,
+		   const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
+
+	MOV_I(t, ip);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 11/24] pipeline: create inline functions for DMA instruction
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (8 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
                   ` (13 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          |  80 ++++++------------
 lib/pipeline/rte_swx_pipeline_internal.h | 100 +++++++++++++++++++++++
 2 files changed, 123 insertions(+), 57 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 72606f1a06..a06dc8d348 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2465,46 +2465,12 @@ instr_mov_i_exec(struct rte_swx_pipeline *p)
  * dma.
  */
 static inline void
-__instr_dma_ht_exec(struct rte_swx_pipeline *p, uint32_t n_dma);
-
-static inline void
-__instr_dma_ht_exec(struct rte_swx_pipeline *p, uint32_t n_dma)
+instr_dma_ht_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *action_data = t->structs[0];
-	uint64_t valid_headers = t->valid_headers;
-	uint32_t i;
-
-	for (i = 0; i < n_dma; i++) {
-		uint32_t header_id = ip->dma.dst.header_id[i];
-		uint32_t struct_id = ip->dma.dst.struct_id[i];
-		uint32_t offset = ip->dma.src.offset[i];
-		uint32_t n_bytes = ip->dma.n_bytes[i];
-
-		struct header_runtime *h = &t->headers[header_id];
-		uint8_t *h_ptr0 = h->ptr0;
-		uint8_t *h_ptr = t->structs[struct_id];
-
-		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
-			h_ptr : h_ptr0;
-		void *src = &action_data[offset];
-
-		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
 
-		/* Headers. */
-		memcpy(dst, src, n_bytes);
-		t->structs[struct_id] = dst;
-		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-	}
-
-	t->valid_headers = valid_headers;
-}
-
-static inline void
-instr_dma_ht_exec(struct rte_swx_pipeline *p)
-{
-	__instr_dma_ht_exec(p, 1);
+	__instr_dma_ht_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2513,10 +2479,10 @@ instr_dma_ht_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht2_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 2);
+	__instr_dma_ht2_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2525,10 +2491,10 @@ instr_dma_ht2_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht3_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 3);
+	__instr_dma_ht3_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2537,10 +2503,10 @@ instr_dma_ht3_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht4_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 4);
+	__instr_dma_ht4_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2549,10 +2515,10 @@ instr_dma_ht4_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht5_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 5);
+	__instr_dma_ht5_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2561,10 +2527,10 @@ instr_dma_ht5_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht6_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 6);
+	__instr_dma_ht6_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2573,10 +2539,10 @@ instr_dma_ht6_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht7_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 7);
+	__instr_dma_ht7_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2585,10 +2551,10 @@ instr_dma_ht7_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht8_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 8);
+	__instr_dma_ht8_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 1bf94159a9..ec8e342a5d 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2111,4 +2111,104 @@ __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
 	MOV_I(t, ip);
 }
 
+/*
+ * dma.
+ */
+static inline void
+__instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			 struct thread *t,
+			 const struct instruction *ip,
+			 uint32_t n_dma)
+{
+	uint8_t *action_data = t->structs[0];
+	uint64_t valid_headers = t->valid_headers;
+	uint32_t i;
+
+	for (i = 0; i < n_dma; i++) {
+		uint32_t header_id = ip->dma.dst.header_id[i];
+		uint32_t struct_id = ip->dma.dst.struct_id[i];
+		uint32_t offset = ip->dma.src.offset[i];
+		uint32_t n_bytes = ip->dma.n_bytes[i];
+
+		struct header_runtime *h = &t->headers[header_id];
+		uint8_t *h_ptr0 = h->ptr0;
+		uint8_t *h_ptr = t->structs[struct_id];
+
+		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
+			h_ptr : h_ptr0;
+		void *src = &action_data[offset];
+
+		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
+
+		/* Headers. */
+		memcpy(dst, src, n_bytes);
+		t->structs[struct_id] = dst;
+		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	}
+
+	t->valid_headers = valid_headers;
+}
+
+static inline void
+__instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	__instr_dma_ht_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 2);
+}
+
+static inline void
+__instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 3);
+}
+
+static inline void
+__instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 4);
+}
+
+static inline void
+__instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 5);
+}
+
+static inline void
+__instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 6);
+}
+
+static inline void
+__instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 7);
+}
+
+static inline void
+__instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 8);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 12/24] pipeline: create inline functions for ALU instructions
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (9 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
                   ` (12 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 348 ++-----------
 lib/pipeline/rte_swx_pipeline_internal.h | 616 +++++++++++++++++++++++
 2 files changed, 660 insertions(+), 304 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index a06dc8d348..8956b6de27 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3044,10 +3044,8 @@ instr_alu_add_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add\n", p->thread_id);
-
-	/* Structs. */
-	ALU(t, ip, +);
+	/* Structs */
+	__instr_alu_add_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3059,10 +3057,8 @@ instr_alu_add_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, +);
+	__instr_alu_add_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3074,10 +3070,8 @@ instr_alu_add_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, +);
+	__instr_alu_add_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3089,10 +3083,8 @@ instr_alu_add_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, +);
+	__instr_alu_add_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3104,10 +3096,8 @@ instr_alu_add_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, +);
+	__instr_alu_add_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3119,10 +3109,8 @@ instr_alu_add_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, +);
+	__instr_alu_add_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3134,10 +3122,8 @@ instr_alu_sub_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, -);
+	__instr_alu_sub_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3149,10 +3135,8 @@ instr_alu_sub_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, -);
+	__instr_alu_sub_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3164,10 +3148,8 @@ instr_alu_sub_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, -);
+	__instr_alu_sub_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3179,10 +3161,8 @@ instr_alu_sub_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, -);
+	__instr_alu_sub_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3194,10 +3174,8 @@ instr_alu_sub_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, -);
+	__instr_alu_sub_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3209,10 +3187,8 @@ instr_alu_sub_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, -);
+	__instr_alu_sub_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3224,10 +3200,8 @@ instr_alu_shl_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, <<);
+	__instr_alu_shl_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3239,10 +3213,8 @@ instr_alu_shl_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, <<);
+	__instr_alu_shl_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3254,10 +3226,8 @@ instr_alu_shl_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, <<);
+	__instr_alu_shl_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3269,10 +3239,8 @@ instr_alu_shl_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, <<);
+	__instr_alu_shl_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3284,10 +3252,8 @@ instr_alu_shl_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, <<);
+	__instr_alu_shl_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3299,10 +3265,8 @@ instr_alu_shl_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, <<);
+	__instr_alu_shl_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3314,10 +3278,8 @@ instr_alu_shr_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, >>);
+	__instr_alu_shr_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3329,10 +3291,8 @@ instr_alu_shr_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, >>);
+	__instr_alu_shr_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3344,10 +3304,8 @@ instr_alu_shr_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, >>);
+	__instr_alu_shr_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3359,10 +3317,8 @@ instr_alu_shr_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, >>);
+	__instr_alu_shr_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3374,10 +3330,8 @@ instr_alu_shr_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, >>);
+	__instr_alu_shr_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3389,10 +3343,8 @@ instr_alu_shr_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, >>);
+	__instr_alu_shr_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3404,10 +3356,8 @@ instr_alu_and_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, &);
+	__instr_alu_and_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3419,10 +3369,8 @@ instr_alu_and_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, &);
+	__instr_alu_and_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3434,10 +3382,8 @@ instr_alu_and_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, &);
+	__instr_alu_and_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3449,10 +3395,8 @@ instr_alu_and_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, &);
+	__instr_alu_and_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3464,10 +3408,8 @@ instr_alu_and_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, &);
+	__instr_alu_and_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3479,10 +3421,8 @@ instr_alu_or_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, |);
+	__instr_alu_or_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3494,10 +3434,8 @@ instr_alu_or_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, |);
+	__instr_alu_or_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3509,10 +3447,8 @@ instr_alu_or_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, |);
+	__instr_alu_or_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3524,10 +3460,8 @@ instr_alu_or_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, |);
+	__instr_alu_or_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3539,10 +3473,8 @@ instr_alu_or_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, |);
+	__instr_alu_or_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3554,10 +3486,8 @@ instr_alu_xor_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, ^);
+	__instr_alu_xor_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3569,10 +3499,8 @@ instr_alu_xor_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, ^);
+	__instr_alu_xor_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3584,10 +3512,8 @@ instr_alu_xor_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, ^);
+	__instr_alu_xor_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3599,10 +3525,8 @@ instr_alu_xor_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, ^);
+	__instr_alu_xor_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3614,10 +3538,8 @@ instr_alu_xor_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, ^);
+	__instr_alu_xor_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3628,55 +3550,9 @@ instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* The first input (r) is a 16-bit number. The second and the third
-	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
-	 * three numbers (output r) is a 34-bit number.
-	 */
-	r += (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is an 18-bit
-	 * number. In the worst case scenario, the sum of the two numbers is a
-	 * 19-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
-	 * therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3687,67 +3563,9 @@ instr_alu_cksub_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
-	 * the following sequence of operations in 2's complement arithmetic:
-	 *    a '- b = (a - b) % 0xFFFF.
-	 *
-	 * In order to prevent an underflow for the below subtraction, in which
-	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
-	 * minuend), we first add a multiple of the 0xFFFF modulus to the
-	 * minuend. The number we add to the minuend needs to be a 34-bit number
-	 * or higher, so for readability reasons we picked the 36-bit multiple.
-	 * We are effectively turning the 16-bit minuend into a 36-bit number:
-	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
-	 */
-	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
-
-	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
-	 * result (the output r) is a 36-bit number.
-	 */
-	r -= (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_cksub_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3758,47 +3576,9 @@ instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r0, r1;
-
-	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
-	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
-	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
-	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
-	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
-
-	/* The first input is a 16-bit number. The second input is a 19-bit
-	 * number. Their sum is a 20-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	r0 = ~r0 & 0xFFFF;
-	r0 = r0 ? r0 : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r0;
+	__instr_alu_ckadd_struct20_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3809,49 +3589,9 @@ instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r = 0;
-	uint32_t i;
-
-	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
-	 * Therefore, in the worst case scenario, a 35-bit number is added to a
-	 * 16-bit number (the input r), so the output r is 36-bit number.
-	 */
-	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
-		r += *src32_ptr;
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_struct_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index ec8e342a5d..7c4a2c05ef 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2211,4 +2211,620 @@ __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct
 	__instr_dma_ht_many_exec(p, t, ip, 8);
 }
 
+/*
+ * alu.
+ */
+static inline void
+__instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add\n", p->thread_id);
+
+	ALU(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, +);
+}
+
+static inline void
+__instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub\n", p->thread_id);
+
+	ALU(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, -);
+}
+
+static inline void
+__instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl\n", p->thread_id);
+
+	ALU(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr\n", p->thread_id);
+
+	ALU(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
+
+	/* Structs. */
+	ALU_MI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and\n", p->thread_id);
+
+	ALU(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (i)\n", p->thread_id);
+
+	ALU_I(t, ip, &);
+}
+
+static inline void
+__instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or\n", p->thread_id);
+
+	ALU(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		      struct thread *t,
+		      const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (i)\n", p->thread_id);
+
+	ALU_I(t, ip, |);
+}
+
+static inline void
+__instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor\n", p->thread_id);
+
+	ALU(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
+
+	ALU_I(t, ip, ^);
+}
+
+static inline void
+__instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* The first input (r) is a 16-bit number. The second and the third
+	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
+	 * three numbers (output r) is a 34-bit number.
+	 */
+	r += (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is an 18-bit
+	 * number. In the worst case scenario, the sum of the two numbers is a
+	 * 19-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
+	 * therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
+	 * the following sequence of operations in 2's complement arithmetic:
+	 *    a '- b = (a - b) % 0xFFFF.
+	 *
+	 * In order to prevent an underflow for the below subtraction, in which
+	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
+	 * minuend), we first add a multiple of the 0xFFFF modulus to the
+	 * minuend. The number we add to the minuend needs to be a 34-bit number
+	 * or higher, so for readability reasons we picked the 36-bit multiple.
+	 * We are effectively turning the 16-bit minuend into a 36-bit number:
+	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
+	 */
+	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
+
+	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
+	 * result (the output r) is a 36-bit number.
+	 */
+	r -= (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
+				struct thread *t,
+				const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r0, r1;
+
+	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
+	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
+	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
+	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
+	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
+
+	/* The first input is a 16-bit number. The second input is a 19-bit
+	 * number. Their sum is a 20-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	r0 = ~r0 & 0xFFFF;
+	r0 = r0 ? r0 : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r0;
+}
+
+static inline void
+__instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
+			      struct thread *t,
+			      const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r = 0;
+	uint32_t i;
+
+	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
+	 * Therefore, in the worst case scenario, a 35-bit number is added to a
+	 * 16-bit number (the input r), so the output r is 36-bit number.
+	 */
+	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
+		r += *src32_ptr;
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 13/24] pipeline: create inline functions for register instructions
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (10 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
                   ` (11 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 320 ++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 475 +++++++++++++++++++++++
 2 files changed, 502 insertions(+), 293 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 8956b6de27..c7117bb6da 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3901,134 +3901,14 @@ instr_regadd_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline uint64_t *
-instr_regarray_regarray(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-	return r->regarray;
-}
-
-static inline uint64_t
-instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
-	uint64_t idx = idx64 & idx64_mask & r->size_mask;
-
-	return idx;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint64_t
-instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
-
-	return idx;
-}
-
-#else
-
-#define instr_regarray_idx_nbo instr_regarray_idx_hbo
-
-#endif
-
-static inline uint64_t
-instr_regarray_idx_imm(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint64_t idx = ip->regarray.idx_val & r->size_mask;
-
-	return idx;
-}
-
-static inline uint64_t
-instr_regarray_src_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return src;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint64_t
-instr_regarray_src_nbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
-
-	return src;
-}
-
-#else
-
-#define instr_regarray_src_nbo instr_regarray_src_hbo
-
-#endif
-
-static inline void
-instr_regarray_dst_hbo_src_hbo_set(struct thread *t, struct instruction *ip, uint64_t src)
-{
-	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline void
-instr_regarray_dst_nbo_src_hbo_set(struct thread *t, struct instruction *ip, uint64_t src)
-{
-	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-
-	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-}
-
-#else
-
-#define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
-
-#endif
-
 static inline void
 instr_regprefetch_rh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_rh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4039,14 +3919,9 @@ instr_regprefetch_rm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_rm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4057,14 +3932,9 @@ instr_regprefetch_ri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_ri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4075,14 +3945,9 @@ instr_regrd_hrh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hrh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4093,14 +3958,9 @@ instr_regrd_hrm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hrm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4111,14 +3971,9 @@ instr_regrd_mrh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mrh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4129,12 +3984,9 @@ instr_regrd_mrm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mrm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4145,14 +3997,9 @@ instr_regrd_hri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4163,14 +4010,9 @@ instr_regrd_mri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4181,15 +4023,9 @@ instr_regwr_rhh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rhh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4200,15 +4036,9 @@ instr_regwr_rhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4219,15 +4049,9 @@ instr_regwr_rmh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rmh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4238,15 +4062,9 @@ instr_regwr_rmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4257,15 +4075,9 @@ instr_regwr_rhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4276,15 +4088,9 @@ instr_regwr_rmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4295,15 +4101,9 @@ instr_regwr_rih_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rih_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4314,15 +4114,9 @@ instr_regwr_rim_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rim_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4333,15 +4127,9 @@ instr_regwr_rii_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rii_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4352,15 +4140,9 @@ instr_regadd_rhh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rhh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4371,15 +4153,9 @@ instr_regadd_rhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4390,15 +4166,9 @@ instr_regadd_rmh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rmh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4409,15 +4179,9 @@ instr_regadd_rmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4428,15 +4192,9 @@ instr_regadd_rhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4447,15 +4205,9 @@ instr_regadd_rmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4466,15 +4218,9 @@ instr_regadd_rih_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rih_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4485,15 +4231,9 @@ instr_regadd_rim_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rim_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4504,15 +4244,9 @@ instr_regadd_rii_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rii_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7c4a2c05ef..2526c2f4c7 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2827,4 +2827,479 @@ __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
 	*dst16_ptr = (uint16_t)r;
 }
 
+/*
+ * Register array.
+ */
+static inline uint64_t *
+instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+	return r->regarray;
+}
+
+static inline uint64_t
+instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
+	uint64_t idx = idx64 & idx64_mask & r->size_mask;
+
+	return idx;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
+
+	return idx;
+}
+
+#else
+
+#define instr_regarray_idx_nbo instr_regarray_idx_hbo
+
+#endif
+
+static inline uint64_t
+instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint64_t idx = ip->regarray.idx_val & r->size_mask;
+
+	return idx;
+}
+
+static inline uint64_t
+instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return src;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
+
+	return src;
+}
+
+#else
+
+#define instr_regarray_src_nbo instr_regarray_src_hbo
+
+#endif
+
+static inline void
+instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
+{
+	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline void
+instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
+{
+	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+
+	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+}
+
+#else
+
+#define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
+
+#endif
+
+static inline void
+__instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
+			    struct thread *t __rte_unused,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
+
+	/* Structs. */
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rii_exec(struct rte_swx_pipeline *p,
+		       struct thread *t __rte_unused,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rii_exec(struct rte_swx_pipeline *p,
+			struct thread *t __rte_unused,
+			const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 14/24] pipeline: create inline functions for meter instructions
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (11 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
                   ` (10 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 457 +------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 541 +++++++++++++++++++++++
 2 files changed, 558 insertions(+), 440 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index c7117bb6da..8b64c57652 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -4470,119 +4470,14 @@ instr_meter_translate(struct rte_swx_pipeline *p,
 	CHECK(0, EINVAL);
 }
 
-static inline struct meter *
-instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
-	uint64_t idx = idx64 & idx64_mask & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline struct meter *
-instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-#else
-
-#define instr_meter_idx_nbo instr_meter_idx_hbo
-
-#endif
-
-static inline struct meter *
-instr_meter_idx_imm(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint64_t idx =  ip->meter.idx_val & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-static inline uint32_t
-instr_meter_length_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return (uint32_t)src;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint32_t
-instr_meter_length_nbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
-
-	return (uint32_t)src;
-}
-
-#else
-
-#define instr_meter_length_nbo instr_meter_length_hbo
-
-#endif
-
-static inline enum rte_color
-instr_meter_color_in_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return (enum rte_color)src;
-}
-
-static inline void
-instr_meter_color_out_hbo_set(struct thread *t, struct instruction *ip, enum rte_color color_out)
-{
-	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
-
-	uint64_t src = (uint64_t)color_out;
-
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-}
-
 static inline void
 instr_metprefetch_h_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_h_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4593,13 +4488,9 @@ instr_metprefetch_m_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_m_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4610,13 +4501,9 @@ instr_metprefetch_i_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4627,35 +4514,9 @@ instr_meter_hhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4666,35 +4527,9 @@ instr_meter_hhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4705,73 +4540,22 @@ instr_meter_hmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
 }
+
 static inline void
 instr_meter_hmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4782,35 +4566,9 @@ instr_meter_mhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4821,35 +4579,9 @@ instr_meter_mhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4860,35 +4592,9 @@ instr_meter_mmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4899,35 +4605,9 @@ instr_meter_mmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4938,35 +4618,9 @@ instr_meter_ihm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_ihm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4977,35 +4631,9 @@ instr_meter_ihi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_ihi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -5016,73 +4644,22 @@ instr_meter_imm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_imm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
 }
+
 static inline void
 instr_meter_imi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_imi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 2526c2f4c7..791adfb471 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -3302,4 +3302,545 @@ __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
 	regarray[idx] += src;
 }
 
+/*
+ * metarray.
+ */
+static inline struct meter *
+instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
+	uint64_t idx = idx64 & idx64_mask & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline struct meter *
+instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+#else
+
+#define instr_meter_idx_nbo instr_meter_idx_hbo
+
+#endif
+
+static inline struct meter *
+instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint64_t idx =  ip->meter.idx_val & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+static inline uint32_t
+instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return (uint32_t)src;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint32_t
+instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
+
+	return (uint32_t)src;
+}
+
+#else
+
+#define instr_meter_length_nbo instr_meter_length_hbo
+
+#endif
+
+static inline enum rte_color
+instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return (enum rte_color)src;
+}
+
+static inline void
+instr_meter_color_out_hbo_set(struct thread *t,
+			      const struct instruction *ip,
+			      enum rte_color color_out)
+{
+	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
+
+	uint64_t src = (uint64_t)color_out;
+
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+}
+
+static inline void
+__instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
+			   struct thread *t __rte_unused,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 15/24] pipeline: create inline functions for instruction operands
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (12 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
                   ` (9 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline_internal.h | 29 ++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 791adfb471..efd136196f 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -928,6 +928,35 @@ struct thread {
 #define HEADER_VALID(thread, header_id) \
 	MASK64_BIT_GET((thread)->valid_headers, header_id)
 
+static inline uint64_t
+instr_operand_hbo(struct thread *t, const struct instr_operand *x)
+{
+	uint8_t *x_struct = t->structs[x->struct_id];
+	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
+	uint64_t x64 = *x64_ptr;
+	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
+
+	return x64 & x64_mask;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_operand_nbo(struct thread *t, const struct instr_operand *x)
+{
+	uint8_t *x_struct = t->structs[x->struct_id];
+	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
+	uint64_t x64 = *x64_ptr;
+
+	return ntoh64(x64) >> (64 - x->n_bits);
+}
+
+#else
+
+#define instr_operand_nbo instr_operand_hbo
+
+#endif
+
 #define ALU(thread, ip, operator)  \
 {                                                                              \
 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 16/24] pipeline: enable persistent instruction meta-data
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (13 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 17/24] pipeline: introduce action functions Cristian Dumitrescu
                   ` (8 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Save the instruction meta-data for later use instead of freeing it up
once the instruction translation is completed.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 9 ++++++---
 lib/pipeline/rte_swx_pipeline_internal.h | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 8b64c57652..4099e364f5 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -6456,13 +6456,14 @@ instruction_config(struct rte_swx_pipeline *p,
 
 	if (a) {
 		a->instructions = instr;
+		a->instruction_data = data;
 		a->n_instructions = n_instructions;
 	} else {
 		p->instructions = instr;
+		p->instruction_data = data;
 		p->n_instructions = n_instructions;
 	}
 
-	free(data);
 	return 0;
 
 error:
@@ -6811,8 +6812,8 @@ action_build(struct rte_swx_pipeline *p)
 {
 	struct action *action;
 
-	p->action_instructions = calloc(p->n_actions,
-					sizeof(struct instruction *));
+	/* p->action_instructions. */
+	p->action_instructions = calloc(p->n_actions, sizeof(struct instruction *));
 	CHECK(p->action_instructions, ENOMEM);
 
 	TAILQ_FOREACH(action, &p->actions, node)
@@ -6841,6 +6842,7 @@ action_free(struct rte_swx_pipeline *p)
 			break;
 
 		TAILQ_REMOVE(&p->actions, action, node);
+		free(action->instruction_data);
 		free(action->instructions);
 		free(action);
 	}
@@ -8777,6 +8779,7 @@ rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 	if (!p)
 		return;
 
+	free(p->instruction_data);
 	free(p->instructions);
 
 	metarray_free(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index efd136196f..7a02d6cb5f 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -693,6 +693,7 @@ struct action {
 	struct struct_type *st;
 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
 	struct instruction *instructions;
+	struct instruction_data *instruction_data;
 	uint32_t n_instructions;
 	uint32_t id;
 };
@@ -1388,6 +1389,7 @@ struct rte_swx_pipeline {
 	struct regarray_runtime *regarray_runtime;
 	struct metarray_runtime *metarray_runtime;
 	struct instruction *instructions;
+	struct instruction_data *instruction_data;
 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
 
 	uint32_t n_structs;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 17/24] pipeline: introduce action functions
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (14 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
                   ` (7 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

For better performance, the option to run a single function per action
is now provided, which requires a single function call per action that
can be better optimized by the C compiler, as opposed to one function
call per instruction. Special table lookup instructions are added to
to support this feature.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 127 +++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h |   6 ++
 2 files changed, 133 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 4099e364f5..0d02548137 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2097,6 +2097,62 @@ instr_table_exec(struct rte_swx_pipeline *p)
 	thread_ip_action_call(p, t, action_id);
 }
 
+static inline void
+instr_table_af_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	uint32_t table_id = ip->table.table_id;
+	struct rte_swx_table_state *ts = &t->table_state[table_id];
+	struct table_runtime *table = &t->tables[table_id];
+	struct table_statistics *stats = &p->table_stats[table_id];
+	uint64_t action_id, n_pkts_hit, n_pkts_action;
+	uint8_t *action_data;
+	action_func_t action_func;
+	int done, hit;
+
+	/* Table. */
+	done = table->func(ts->obj,
+			   table->mailbox,
+			   table->key,
+			   &action_id,
+			   &action_data,
+			   &hit);
+	if (!done) {
+		/* Thread. */
+		TRACE("[Thread %2u] table %u (not finalized)\n",
+		      p->thread_id,
+		      table_id);
+
+		thread_yield(p);
+		return;
+	}
+
+	action_id = hit ? action_id : ts->default_action_id;
+	action_data = hit ? action_data : ts->default_action_data;
+	action_func = p->action_funcs[action_id];
+	n_pkts_hit = stats->n_pkts_hit[hit];
+	n_pkts_action = stats->n_pkts_action[action_id];
+
+	TRACE("[Thread %2u] table %u (%s, action %u)\n",
+	      p->thread_id,
+	      table_id,
+	      hit ? "hit" : "miss",
+	      (uint32_t)action_id);
+
+	t->action_id = action_id;
+	t->structs[0] = action_data;
+	t->hit = hit;
+	stats->n_pkts_hit[hit] = n_pkts_hit + 1;
+	stats->n_pkts_action[action_id] = n_pkts_action + 1;
+
+	/* Thread. */
+	thread_ip_inc(p);
+
+	/* Action. */
+	action_func(p);
+}
+
 static inline void
 instr_selector_exec(struct rte_swx_pipeline *p)
 {
@@ -2193,6 +2249,68 @@ instr_learner_exec(struct rte_swx_pipeline *p)
 	thread_ip_action_call(p, t, action_id);
 }
 
+static inline void
+instr_learner_af_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	uint32_t learner_id = ip->table.table_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+	uint64_t action_id, n_pkts_hit, n_pkts_action, time;
+	uint8_t *action_data;
+	action_func_t action_func;
+	int done, hit;
+
+	/* Table. */
+	time = rte_get_tsc_cycles();
+
+	done = rte_swx_table_learner_lookup(ts->obj,
+					    l->mailbox,
+					    time,
+					    l->key,
+					    &action_id,
+					    &action_data,
+					    &hit);
+	if (!done) {
+		/* Thread. */
+		TRACE("[Thread %2u] learner %u (not finalized)\n",
+		      p->thread_id,
+		      learner_id);
+
+		thread_yield(p);
+		return;
+	}
+
+	action_id = hit ? action_id : ts->default_action_id;
+	action_data = hit ? action_data : ts->default_action_data;
+	action_func = p->action_funcs[action_id];
+	n_pkts_hit = stats->n_pkts_hit[hit];
+	n_pkts_action = stats->n_pkts_action[action_id];
+
+	TRACE("[Thread %2u] learner %u (%s, action %u)\n",
+	      p->thread_id,
+	      learner_id,
+	      hit ? "hit" : "miss",
+	      (uint32_t)action_id);
+
+	t->action_id = action_id;
+	t->structs[0] = action_data;
+	t->hit = hit;
+	t->learner_id = learner_id;
+	t->time = time;
+	stats->n_pkts_hit[hit] = n_pkts_hit + 1;
+	stats->n_pkts_action[action_id] = n_pkts_action + 1;
+
+	/* Thread. */
+	thread_ip_action_call(p, t, action_id);
+
+	/* Action */
+	action_func(p);
+}
+
 /*
  * learn.
  */
@@ -6618,8 +6736,10 @@ static instr_exec_t instruction_table[] = {
 	[INSTR_METER_IMI] = instr_meter_imi_exec,
 
 	[INSTR_TABLE] = instr_table_exec,
+	[INSTR_TABLE_AF] = instr_table_af_exec,
 	[INSTR_SELECTOR] = instr_selector_exec,
 	[INSTR_LEARNER] = instr_learner_exec,
+	[INSTR_LEARNER_AF] = instr_learner_af_exec,
 	[INSTR_LEARNER_LEARN] = instr_learn_exec,
 	[INSTR_LEARNER_FORGET] = instr_forget_exec,
 	[INSTR_EXTERN_OBJ] = instr_extern_obj_exec,
@@ -6819,12 +6939,19 @@ action_build(struct rte_swx_pipeline *p)
 	TAILQ_FOREACH(action, &p->actions, node)
 		p->action_instructions[action->id] = action->instructions;
 
+	/* p->action_funcs. */
+	p->action_funcs = calloc(p->n_actions, sizeof(action_func_t));
+	CHECK(p->action_funcs, ENOMEM);
+
 	return 0;
 }
 
 static void
 action_build_free(struct rte_swx_pipeline *p)
 {
+	free(p->action_funcs);
+	p->action_funcs = NULL;
+
 	free(p->action_instructions);
 	p->action_instructions = NULL;
 }
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7a02d6cb5f..3578a10501 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -443,8 +443,10 @@ enum instruction_type {
 
 	/* table TABLE */
 	INSTR_TABLE,
+	INSTR_TABLE_AF,
 	INSTR_SELECTOR,
 	INSTR_LEARNER,
+	INSTR_LEARNER_AF,
 
 	/* learn LEARNER ACTION_NAME */
 	INSTR_LEARNER_LEARN,
@@ -687,6 +689,9 @@ struct instruction_data {
 /*
  * Action.
  */
+typedef void
+(*action_func_t)(struct rte_swx_pipeline *p);
+
 struct action {
 	TAILQ_ENTRY(action) node;
 	char name[RTE_SWX_NAME_SIZE];
@@ -1382,6 +1387,7 @@ struct rte_swx_pipeline {
 	struct port_in_runtime *in;
 	struct port_out_runtime *out;
 	struct instruction **action_instructions;
+	action_func_t *action_funcs;
 	struct rte_swx_table_state *table_state;
 	struct table_statistics *table_stats;
 	struct selector_statistics *selector_stats;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 18/24] pipeline: introduce custom instructions
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (15 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 17/24] pipeline: introduce action functions Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

For better performance, the option to create custom instructions when
the program is translated and add them on-the-fly to the pipeline is
now provided. Multiple regular instructions can now be consolidated
into a single C function optimized by the C compiler directly.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 6 +++++-
 lib/pipeline/rte_swx_pipeline_internal.h | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 0d02548137..598009c024 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -6592,7 +6592,11 @@ instruction_config(struct rte_swx_pipeline *p,
 
 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
 
-static instr_exec_t instruction_table[] = {
+#ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
+#define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
+#endif
+
+static instr_exec_t instruction_table[RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX] = {
 	[INSTR_RX] = instr_rx_exec,
 	[INSTR_TX] = instr_tx_exec,
 	[INSTR_TX_I] = instr_tx_i_exec,
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 3578a10501..4ad6dd42dd 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -541,6 +541,9 @@ enum instruction_type {
 	 * Return from action
 	 */
 	INSTR_RETURN,
+
+	/* Start of custom instructions. */
+	INSTR_CUSTOM_0,
 };
 
 struct instr_operand {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 19/24] pipeline: introduce pipeline compilation
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (16 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
                   ` (5 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Lay the foundation to generate C code for the pipeline: C functions
for actions and custom instructions are generated, built as shared
object library and loaded into the pipeline.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 44 +++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 598009c024..4c0e3043ec 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -8953,6 +8953,9 @@ rte_swx_pipeline_instructions_config(struct rte_swx_pipeline *p,
 	return 0;
 }
 
+static int
+pipeline_compile(struct rte_swx_pipeline *p);
+
 int
 rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 {
@@ -9018,6 +9021,9 @@ rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 		goto error;
 
 	p->build_done = 1;
+
+	pipeline_compile(p);
+
 	return 0;
 
 error:
@@ -9760,3 +9766,41 @@ rte_swx_ctl_meter_stats_read(struct rte_swx_pipeline *p,
 
 	return 0;
 }
+
+/*
+ * Pipeline compilation.
+ */
+static int
+pipeline_codegen(struct rte_swx_pipeline *p)
+{
+	FILE *f = NULL;
+
+	if (!p)
+		return -EINVAL;
+
+	/* Create the .c file. */
+	f = fopen("/tmp/pipeline.c", "w");
+	if (!f)
+		return -EIO;
+
+	/* Include the .h file. */
+	fprintf(f, "#include \"rte_swx_pipeline_internal.h\"\n");
+
+	/* Close the .c file. */
+	fclose(f);
+
+	return 0;
+}
+
+static int
+pipeline_compile(struct rte_swx_pipeline *p)
+{
+	int status = 0;
+
+	/* Code generation. */
+	status = pipeline_codegen(p);
+	if (status)
+		return status;
+
+	return status;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 20/24] pipeline: export pipeline instructions to file
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (17 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
@ 2021-09-10 12:29 ` Cristian Dumitrescu
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 21/24] pipeline: generate action functions Cristian Dumitrescu
                   ` (4 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:29 UTC (permalink / raw)
  To: dev

Export the array of translated instructions to a C file. There is one
such array per action and one for the pipeline.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 1097 +++++++++++++++++++++++++++++++
 1 file changed, 1097 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 4c0e3043ec..0153c70b5f 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -9770,9 +9770,1097 @@ rte_swx_ctl_meter_stats_read(struct rte_swx_pipeline *p,
 /*
  * Pipeline compilation.
  */
+static const char *
+instr_type_to_name(struct instruction *instr)
+{
+	switch (instr->type) {
+		case INSTR_RX: return "INSTR_RX";
+
+		case INSTR_TX: return "INSTR_TX";
+		case INSTR_TX_I: return "INSTR_TX_I";
+
+		case INSTR_HDR_EXTRACT: return "INSTR_HDR_EXTRACT";
+		case INSTR_HDR_EXTRACT2: return "INSTR_HDR_EXTRACT2";
+		case INSTR_HDR_EXTRACT3: return "INSTR_HDR_EXTRACT3";
+		case INSTR_HDR_EXTRACT4: return "INSTR_HDR_EXTRACT4";
+		case INSTR_HDR_EXTRACT5: return "INSTR_HDR_EXTRACT5";
+		case INSTR_HDR_EXTRACT6: return "INSTR_HDR_EXTRACT6";
+		case INSTR_HDR_EXTRACT7: return "INSTR_HDR_EXTRACT7";
+		case INSTR_HDR_EXTRACT8: return "INSTR_HDR_EXTRACT8";
+
+		case INSTR_HDR_EXTRACT_M: return "INSTR_HDR_EXTRACT_M";
+
+		case INSTR_HDR_LOOKAHEAD: return "INSTR_HDR_LOOKAHEAD";
+
+		case INSTR_HDR_EMIT: return "INSTR_HDR_EMIT";
+		case INSTR_HDR_EMIT_TX: return "INSTR_HDR_EMIT_TX";
+		case INSTR_HDR_EMIT2_TX: return "INSTR_HDR_EMIT2_TX";
+		case INSTR_HDR_EMIT3_TX: return "INSTR_HDR_EMIT3_TX";
+		case INSTR_HDR_EMIT4_TX: return "INSTR_HDR_EMIT4_TX";
+		case INSTR_HDR_EMIT5_TX: return "INSTR_HDR_EMIT5_TX";
+		case INSTR_HDR_EMIT6_TX: return "INSTR_HDR_EMIT6_TX";
+		case INSTR_HDR_EMIT7_TX: return "INSTR_HDR_EMIT7_TX";
+		case INSTR_HDR_EMIT8_TX: return "INSTR_HDR_EMIT8_TX";
+
+		case INSTR_HDR_VALIDATE: return "INSTR_HDR_VALIDATE";
+		case INSTR_HDR_INVALIDATE: return "INSTR_HDR_INVALIDATE";
+
+		case INSTR_MOV: return "INSTR_MOV";
+		case INSTR_MOV_MH: return "INSTR_MOV_MH";
+		case INSTR_MOV_HM: return "INSTR_MOV_HM";
+		case INSTR_MOV_HH: return "INSTR_MOV_HH";
+		case INSTR_MOV_I: return "INSTR_MOV_I";
+
+		case INSTR_DMA_HT: return "INSTR_DMA_HT";
+		case INSTR_DMA_HT2: return "INSTR_DMA_HT2";
+		case INSTR_DMA_HT3: return "INSTR_DMA_HT3";
+		case INSTR_DMA_HT4: return "INSTR_DMA_HT4";
+		case INSTR_DMA_HT5: return "INSTR_DMA_HT5";
+		case INSTR_DMA_HT6: return "INSTR_DMA_HT6";
+		case INSTR_DMA_HT7: return "INSTR_DMA_HT7";
+		case INSTR_DMA_HT8: return "INSTR_DMA_HT8";
+
+		case INSTR_ALU_ADD: return "INSTR_ALU_ADD";
+		case INSTR_ALU_ADD_MH: return "INSTR_ALU_ADD_MH";
+		case INSTR_ALU_ADD_HM: return "INSTR_ALU_ADD_HM";
+		case INSTR_ALU_ADD_HH: return "INSTR_ALU_ADD_HH";
+		case INSTR_ALU_ADD_MI: return "INSTR_ALU_ADD_MI";
+		case INSTR_ALU_ADD_HI: return "INSTR_ALU_ADD_HI";
+
+		case INSTR_ALU_SUB: return "INSTR_ALU_SUB";
+		case INSTR_ALU_SUB_MH: return "INSTR_ALU_SUB_MH";
+		case INSTR_ALU_SUB_HM: return "INSTR_ALU_SUB_HM";
+		case INSTR_ALU_SUB_HH: return "INSTR_ALU_SUB_HH";
+		case INSTR_ALU_SUB_MI: return "INSTR_ALU_SUB_MI";
+		case INSTR_ALU_SUB_HI: return "INSTR_ALU_SUB_HI";
+
+		case INSTR_ALU_CKADD_FIELD: return "INSTR_ALU_CKADD_FIELD";
+		case INSTR_ALU_CKADD_STRUCT20: return "INSTR_ALU_CKADD_STRUCT20";
+		case INSTR_ALU_CKADD_STRUCT: return "INSTR_ALU_CKADD_STRUCT";
+		case INSTR_ALU_CKSUB_FIELD: return "INSTR_ALU_CKSUB_FIELD";
+
+		case INSTR_ALU_AND: return "INSTR_ALU_AND";
+		case INSTR_ALU_AND_MH: return "INSTR_ALU_AND_MH";
+		case INSTR_ALU_AND_HM: return "INSTR_ALU_AND_HM";
+		case INSTR_ALU_AND_HH: return "INSTR_ALU_AND_HH";
+		case INSTR_ALU_AND_I: return "INSTR_ALU_AND_I";
+
+		case INSTR_ALU_OR: return "INSTR_ALU_OR";
+		case INSTR_ALU_OR_MH: return "INSTR_ALU_OR_MH";
+		case INSTR_ALU_OR_HM: return "INSTR_ALU_OR_HM";
+		case INSTR_ALU_OR_HH: return "INSTR_ALU_OR_HH";
+		case INSTR_ALU_OR_I: return "INSTR_ALU_OR_I";
+
+		case INSTR_ALU_XOR: return "INSTR_ALU_XOR";
+		case INSTR_ALU_XOR_MH: return "INSTR_ALU_XOR_MH";
+		case INSTR_ALU_XOR_HM: return "INSTR_ALU_XOR_HM";
+		case INSTR_ALU_XOR_HH: return "INSTR_ALU_XOR_HH";
+		case INSTR_ALU_XOR_I: return "INSTR_ALU_XOR_I";
+
+		case INSTR_ALU_SHL: return "INSTR_ALU_SHL";
+		case INSTR_ALU_SHL_MH: return "INSTR_ALU_SHL_MH";
+		case INSTR_ALU_SHL_HM: return "INSTR_ALU_SHL_HM";
+		case INSTR_ALU_SHL_HH: return "INSTR_ALU_SHL_HH";
+		case INSTR_ALU_SHL_MI: return "INSTR_ALU_SHL_MI";
+		case INSTR_ALU_SHL_HI: return "INSTR_ALU_SHL_HI";
+
+		case INSTR_ALU_SHR: return "INSTR_ALU_SHR";
+		case INSTR_ALU_SHR_MH: return "INSTR_ALU_SHR_MH";
+		case INSTR_ALU_SHR_HM: return "INSTR_ALU_SHR_HM";
+		case INSTR_ALU_SHR_HH: return "INSTR_ALU_SHR_HH";
+		case INSTR_ALU_SHR_MI: return "INSTR_ALU_SHR_MI";
+		case INSTR_ALU_SHR_HI: return "INSTR_ALU_SHR_HI";
+
+		case INSTR_REGPREFETCH_RH: return "INSTR_REGPREFETCH_RH";
+		case INSTR_REGPREFETCH_RM: return "INSTR_REGPREFETCH_RM";
+		case INSTR_REGPREFETCH_RI: return "INSTR_REGPREFETCH_RI";
+
+		case INSTR_REGRD_HRH: return "INSTR_REGRD_HRH";
+		case INSTR_REGRD_HRM: return "INSTR_REGRD_HRM";
+		case INSTR_REGRD_HRI: return "INSTR_REGRD_HRI";
+		case INSTR_REGRD_MRH: return "INSTR_REGRD_MRH";
+		case INSTR_REGRD_MRM: return "INSTR_REGRD_MRM";
+		case INSTR_REGRD_MRI: return "INSTR_REGRD_MRI";
+
+		case INSTR_REGWR_RHH: return "INSTR_REGWR_RHH";
+		case INSTR_REGWR_RHM: return "INSTR_REGWR_RHM";
+		case INSTR_REGWR_RHI: return "INSTR_REGWR_RHI";
+		case INSTR_REGWR_RMH: return "INSTR_REGWR_RMH";
+		case INSTR_REGWR_RMM: return "INSTR_REGWR_RMM";
+		case INSTR_REGWR_RMI: return "INSTR_REGWR_RMI";
+		case INSTR_REGWR_RIH: return "INSTR_REGWR_RIH";
+		case INSTR_REGWR_RIM: return "INSTR_REGWR_RIM";
+		case INSTR_REGWR_RII: return "INSTR_REGWR_RII";
+
+		case INSTR_REGADD_RHH: return "INSTR_REGADD_RHH";
+		case INSTR_REGADD_RHM: return "INSTR_REGADD_RHM";
+		case INSTR_REGADD_RHI: return "INSTR_REGADD_RHI";
+		case INSTR_REGADD_RMH: return "INSTR_REGADD_RMH";
+		case INSTR_REGADD_RMM: return "INSTR_REGADD_RMM";
+		case INSTR_REGADD_RMI: return "INSTR_REGADD_RMI";
+		case INSTR_REGADD_RIH: return "INSTR_REGADD_RIH";
+		case INSTR_REGADD_RIM: return "INSTR_REGADD_RIM";
+		case INSTR_REGADD_RII: return "INSTR_REGADD_RII";
+
+		case INSTR_METPREFETCH_H: return "INSTR_METPREFETCH_H";
+		case INSTR_METPREFETCH_M: return "INSTR_METPREFETCH_M";
+		case INSTR_METPREFETCH_I: return "INSTR_METPREFETCH_I";
+
+		case INSTR_METER_HHM: return "INSTR_METER_HHM";
+		case INSTR_METER_HHI: return "INSTR_METER_HHI";
+		case INSTR_METER_HMM: return "INSTR_METER_HMM";
+		case INSTR_METER_HMI: return "INSTR_METER_HMI";
+		case INSTR_METER_MHM: return "INSTR_METER_MHM";
+		case INSTR_METER_MHI: return "INSTR_METER_MHI";
+		case INSTR_METER_MMM: return "INSTR_METER_MMM";
+		case INSTR_METER_MMI: return "INSTR_METER_MMI";
+		case INSTR_METER_IHM: return "INSTR_METER_IHM";
+		case INSTR_METER_IHI: return "INSTR_METER_IHI";
+		case INSTR_METER_IMM: return "INSTR_METER_IMM";
+		case INSTR_METER_IMI: return "INSTR_METER_IMI";
+
+		case INSTR_TABLE: return "INSTR_TABLE";
+		case INSTR_TABLE_AF: return "INSTR_TABLE_AF";
+		case INSTR_SELECTOR: return "INSTR_SELECTOR";
+		case INSTR_LEARNER: return "INSTR_LEARNER";
+		case INSTR_LEARNER_AF: return "INSTR_LEARNER_AF";
+
+		case INSTR_LEARNER_LEARN: return "INSTR_LEARNER_LEARN";
+		case INSTR_LEARNER_FORGET: return "INSTR_LEARNER_FORGET";
+
+		case INSTR_EXTERN_OBJ: return "INSTR_EXTERN_OBJ";
+		case INSTR_EXTERN_FUNC: return "INSTR_EXTERN_FUNC";
+
+		case INSTR_JMP: return "INSTR_JMP";
+		case INSTR_JMP_VALID: return "INSTR_JMP_VALID";
+		case INSTR_JMP_INVALID: return "INSTR_JMP_INVALID";
+		case INSTR_JMP_HIT: return "INSTR_JMP_HIT";
+		case INSTR_JMP_MISS: return "INSTR_JMP_MISS";
+		case INSTR_JMP_ACTION_HIT: return "INSTR_JMP_ACTION_HIT";
+		case INSTR_JMP_ACTION_MISS: return "INSTR_JMP_ACTION_MISS";
+		case INSTR_JMP_EQ: return "INSTR_JMP_EQ";
+		case INSTR_JMP_EQ_MH: return "INSTR_JMP_EQ_MH";
+		case INSTR_JMP_EQ_HM: return "INSTR_JMP_EQ_HM";
+		case INSTR_JMP_EQ_HH: return "INSTR_JMP_EQ_HH";
+		case INSTR_JMP_EQ_I: return "INSTR_JMP_EQ_I";
+		case INSTR_JMP_NEQ: return "INSTR_JMP_NEQ";
+		case INSTR_JMP_NEQ_MH: return "INSTR_JMP_NEQ_MH";
+		case INSTR_JMP_NEQ_HM: return "INSTR_JMP_NEQ_HM";
+		case INSTR_JMP_NEQ_HH: return "INSTR_JMP_NEQ_HH";
+		case INSTR_JMP_NEQ_I: return "INSTR_JMP_NEQ_I";
+		case INSTR_JMP_LT: return "INSTR_JMP_LT";
+		case INSTR_JMP_LT_MH: return "INSTR_JMP_LT_MH";
+		case INSTR_JMP_LT_HM: return "INSTR_JMP_LT_HM";
+		case INSTR_JMP_LT_HH: return "INSTR_JMP_LT_HH";
+		case INSTR_JMP_LT_MI: return "INSTR_JMP_LT_MI";
+		case INSTR_JMP_LT_HI: return "INSTR_JMP_LT_HI";
+		case INSTR_JMP_GT: return "INSTR_JMP_GT";
+		case INSTR_JMP_GT_MH: return "INSTR_JMP_GT_MH";
+		case INSTR_JMP_GT_HM: return "INSTR_JMP_GT_HM";
+		case INSTR_JMP_GT_HH: return "INSTR_JMP_GT_HH";
+		case INSTR_JMP_GT_MI: return "INSTR_JMP_GT_MI";
+		case INSTR_JMP_GT_HI: return "INSTR_JMP_GT_HI";
+
+		case INSTR_RETURN: return "INSTR_RETURN";
+
+		default: return "INSTR_UNKNOWN";
+	}
+}
+
+typedef void
+(*instruction_export_t)(struct instruction *, FILE *);
+
+static void
+instr_io_export(struct instruction *instr, FILE *f)
+{
+	uint32_t n_io = 0, n_io_imm = 0, n_hdrs = 0, i;
+
+	/* n_io, n_io_imm, n_hdrs. */
+	if (instr->type == INSTR_RX ||
+	    instr->type == INSTR_TX ||
+	    instr->type == INSTR_HDR_EXTRACT_M ||
+	    (instr->type >= INSTR_HDR_EMIT_TX && instr->type <= INSTR_HDR_EMIT8_TX))
+		n_io = 1;
+
+	if (instr->type == INSTR_TX_I)
+		n_io_imm = 1;
+
+	if (instr->type >= INSTR_HDR_EXTRACT && instr->type <= INSTR_HDR_EXTRACT8)
+		n_hdrs = 1 + (instr->type - INSTR_HDR_EXTRACT);
+
+	if (instr->type == INSTR_HDR_EXTRACT_M ||
+	    instr->type == INSTR_HDR_LOOKAHEAD ||
+	    instr->type == INSTR_HDR_EMIT)
+		n_hdrs = 1;
+
+	if (instr->type >= INSTR_HDR_EMIT_TX && instr->type <= INSTR_HDR_EMIT8_TX)
+		n_hdrs = 1 + (instr->type - INSTR_HDR_EMIT_TX);
+
+	/* instr. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	/* instr.io. */
+	fprintf(f,
+		"\t\t.io = {\n");
+
+	/* instr.io.io. */
+	if (n_io)
+		fprintf(f,
+			"\t\t\t.io = {\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t},\n",
+			instr->io.io.offset,
+			instr->io.io.n_bits);
+
+	if (n_io_imm)
+		fprintf(f,
+			"\t\t\t.io = {\n"
+			"\t\t\t\t.val = %u,\n"
+			"\t\t\t},\n",
+			instr->io.io.val);
+
+	/* instr.io.hdr. */
+	if (n_hdrs) {
+		fprintf(f,
+			"\t\t.hdr = {\n");
+
+		/* instr.io.hdr.header_id. */
+		fprintf(f,
+			"\t\t\t.header_id = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.header_id[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr.struct_id. */
+		fprintf(f,
+			"\t\t\t.struct_id = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.struct_id[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr.n_bytes. */
+		fprintf(f,
+			"\t\t\t.n_bytes = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.n_bytes[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr - closing curly brace. */
+		fprintf(f,
+			"\t\t\t}\n,");
+	}
+
+	/* instr.io - closing curly brace. */
+	fprintf(f,
+		"\t\t},\n");
+
+	/* instr - closing curly brace. */
+	fprintf(f,
+		"\t},\n");
+}
+
+static void
+instr_hdr_validate_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.valid = {\n"
+		"\t\t\t.header_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->valid.header_id);
+}
+
+static void
+instr_mov_export(struct instruction *instr, FILE *f)
+{
+	if (instr->type != INSTR_MOV_I)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.mov = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.src = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->mov.dst.struct_id,
+			instr->mov.dst.n_bits,
+			instr->mov.dst.offset,
+			instr->mov.src.struct_id,
+			instr->mov.src.n_bits,
+			instr->mov.src.offset);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.mov = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.src_val = %" PRIu64 ",\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->mov.dst.struct_id,
+			instr->mov.dst.n_bits,
+			instr->mov.dst.offset,
+			instr->mov.src_val);
+}
+
+static void
+instr_dma_ht_export(struct instruction *instr, FILE *f)
+{
+	uint32_t n_dma = 0, i;
+
+	/* n_dma. */
+	n_dma = 1 + (instr->type - INSTR_DMA_HT);
+
+	/* instr. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	/* instr.dma. */
+	fprintf(f,
+		"\t\t.dma = {\n");
+
+	/* instr.dma.dst. */
+	fprintf(f,
+		"\t\t\t.dst = {\n");
+
+	/* instr.dma.dst.header_id. */
+	fprintf(f,
+		"\t\t\t\t.header_id = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.dst.header_id[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.dst.struct_id. */
+	fprintf(f,
+		"\t\t\t\t.struct_id = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.dst.struct_id[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.dst - closing curly brace. */
+	fprintf(f,
+		"\t\t\t},\n");
+
+	/* instr.dma.src. */
+	fprintf(f,
+		"\t\t\t.src = {\n");
+
+	/* instr.dma.src.offset. */
+	fprintf(f,
+		"\t\t\t\t.offset = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.src.offset[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.src - closing curly brace. */
+	fprintf(f,
+		"\t\t\t},\n");
+
+	/* instr.dma.n_bytes. */
+	fprintf(f,
+		"\t\t\t.n_bytes = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.n_bytes[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma - closing curly brace. */
+	fprintf(f,
+		"\t\t},\n");
+
+	/* instr - closing curly brace. */
+	fprintf(f,
+		"\t},\n");
+}
+
+static void
+instr_alu_export(struct instruction *instr, FILE *f)
+{
+	int imm = 0;
+
+	if (instr->type == INSTR_ALU_ADD_MI ||
+	    instr->type == INSTR_ALU_ADD_HI ||
+	    instr->type == INSTR_ALU_SUB_MI ||
+	    instr->type == INSTR_ALU_SUB_HI ||
+	    instr->type == INSTR_ALU_SHL_MI ||
+	    instr->type == INSTR_ALU_SHL_HI ||
+	    instr->type == INSTR_ALU_SHR_MI ||
+	    instr->type == INSTR_ALU_SHR_HI ||
+	    instr->type == INSTR_ALU_AND_I ||
+	    instr->type == INSTR_ALU_OR_I ||
+	    instr->type == INSTR_ALU_XOR_I)
+	    imm = 1;
+
+	if (!imm)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.alu = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.src = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->alu.dst.struct_id,
+			instr->alu.dst.n_bits,
+			instr->alu.dst.offset,
+			instr->alu.src.struct_id,
+			instr->alu.src.n_bits,
+			instr->alu.src.offset);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.alu = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.src_val = %" PRIu64 ",\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->alu.dst.struct_id,
+			instr->alu.dst.n_bits,
+			instr->alu.dst.offset,
+			instr->alu.src_val);
+}
+
+static void
+instr_reg_export(struct instruction *instr __rte_unused, FILE *f __rte_unused)
+{
+	int prefetch  = 0, idx_imm = 0, src_imm = 0;
+
+	if (instr->type == INSTR_REGPREFETCH_RH ||
+	    instr->type == INSTR_REGPREFETCH_RM ||
+	    instr->type == INSTR_REGPREFETCH_RI)
+		prefetch = 1;
+
+	/* index is the 3rd operand for the regrd instruction and the 2nd
+	 * operand for the regwr and regadd instructions.
+	 */
+	if (instr->type == INSTR_REGPREFETCH_RI ||
+	    instr->type == INSTR_REGRD_HRI ||
+	    instr->type == INSTR_REGRD_MRI ||
+	    instr->type == INSTR_REGWR_RIH ||
+	    instr->type == INSTR_REGWR_RIM ||
+	    instr->type == INSTR_REGWR_RII ||
+	    instr->type == INSTR_REGADD_RIH ||
+	    instr->type == INSTR_REGADD_RIM ||
+	    instr->type == INSTR_REGADD_RII)
+		idx_imm = 1;
+
+	/* src is the 3rd operand for the regwr and regadd instructions. */
+	if (instr->type == INSTR_REGWR_RHI ||
+	    instr->type == INSTR_REGWR_RMI ||
+	    instr->type == INSTR_REGWR_RII ||
+	    instr->type == INSTR_REGADD_RHI ||
+	    instr->type == INSTR_REGADD_RMI ||
+	    instr->type == INSTR_REGADD_RII)
+		src_imm = 1;
+
+	/* instr.regarray.regarray_id. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.regarray = {\n"
+		"\t\t\t.regarray_id = %u,\n",
+		instr_type_to_name(instr),
+		instr->regarray.regarray_id);
+
+	/* instr.regarray.idx / instr.regarray.idx_val. */
+	if (!idx_imm)
+		fprintf(f,
+			"\t\t\t\t.idx = {\n"
+			"\t\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t\t.offset = %u,\n"
+			"\t\t\t\t},\n",
+			instr->regarray.idx.struct_id,
+			instr->regarray.idx.n_bits,
+			instr->regarray.idx.offset);
+	else
+		fprintf(f,
+			"\t\t\t\t.idx_val = %u,\n",
+			instr->regarray.idx_val);
+
+	/* instr.regarray.dstsrc / instr.regarray.dstsrc_val. */
+	if (!prefetch) {
+		if (!src_imm)
+			fprintf(f,
+				"\t\t\t\t.dstsrc = {\n"
+				"\t\t\t\t\t.struct_id = %u,\n"
+				"\t\t\t\t\t.n_bits = %u,\n"
+				"\t\t\t\t\t.offset = %u,\n"
+				"\t\t\t\t},\n",
+				instr->regarray.dstsrc.struct_id,
+				instr->regarray.dstsrc.n_bits,
+				instr->regarray.dstsrc.offset);
+		else
+			fprintf(f,
+				"\t\t\t\t.dstsrc_val = %" PRIu64 ",\n",
+				instr->regarray.dstsrc_val);
+	}
+
+	/* instr.regarray and instr - closing curly braces. */
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+
+	return;
+}
+
+static void
+instr_meter_export(struct instruction *instr __rte_unused, FILE *f __rte_unused)
+{
+	int prefetch  = 0, idx_imm = 0, color_in_imm = 0;
+
+	if (instr->type == INSTR_METPREFETCH_H ||
+	    instr->type == INSTR_METPREFETCH_M ||
+	    instr->type == INSTR_METPREFETCH_I)
+		prefetch = 1;
+
+	/* idx_imm. */
+	if (instr->type == INSTR_METPREFETCH_I ||
+	    instr->type == INSTR_METER_IHM ||
+	    instr->type == INSTR_METER_IHI ||
+	    instr->type == INSTR_METER_IMM ||
+	    instr->type == INSTR_METER_IMI)
+		idx_imm = 1;
+
+	/* color_in_imm. */
+	if (instr->type == INSTR_METER_HHI ||
+	    instr->type == INSTR_METER_HMI ||
+	    instr->type == INSTR_METER_MHI ||
+	    instr->type == INSTR_METER_MMI ||
+	    instr->type == INSTR_METER_IHI ||
+	    instr->type == INSTR_METER_IMI)
+		color_in_imm = 1;
+
+	/* instr.meter.metarray_id. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.meter = {\n"
+		"\t\t\t.metarray_id = %u,\n",
+		instr_type_to_name(instr),
+		instr->meter.metarray_id);
+
+	/* instr.meter.idx / instr.meter.idx_val. */
+	if (!idx_imm)
+		fprintf(f,
+			"\t\t\t.idx = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.idx.struct_id,
+			instr->meter.idx.n_bits,
+			instr->meter.idx.offset);
+	else
+		fprintf(f,
+			"\t\t\t.idx_val = %u,\n",
+			instr->meter.idx_val);
+
+	if (!prefetch) {
+		/* instr.meter.length. */
+		fprintf(f,
+			"\t\t\t.length = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.length.struct_id,
+			instr->meter.length.n_bits,
+			instr->meter.length.offset);
+
+		/* instr.meter.color_in / instr.meter.color_in_val. */
+		if (!color_in_imm)
+			fprintf(f,
+				"\t\t\t.color_in = {\n"
+				"\t\t\t\t.struct_id = %u,\n"
+				"\t\t\t\t.n_bits = %u,\n"
+				"\t\t\t\t.offset = %u,\n"
+				"\t\t\t},\n",
+				instr->meter.color_in.struct_id,
+				instr->meter.color_in.n_bits,
+				instr->meter.color_in.offset);
+		else
+			fprintf(f,
+				"\t\t\t.color_in_val = %u,\n",
+				(uint32_t)instr->meter.color_in_val);
+
+		/* instr.meter.color_out. */
+		fprintf(f,
+			"\t\t\t.color_out = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.color_out.struct_id,
+			instr->meter.color_out.n_bits,
+			instr->meter.color_out.offset);
+	}
+
+	/* instr.meter and instr - closing curly braces. */
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+
+	return;
+}
+
+static void
+instr_table_export(struct instruction *instr,
+		FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.table = {\n"
+		"\t\t\t.table_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->table.table_id);
+}
+
+static void
+instr_learn_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.learn = {\n"
+		"\t\t\t\t.action_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->learn.action_id);
+}
+
+static void
+instr_forget_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t},\n",
+		instr_type_to_name(instr));
+}
+
+static void
+instr_extern_export(struct instruction *instr, FILE *f)
+{
+	if (instr->type == INSTR_EXTERN_OBJ)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.ext_obj = {\n"
+			"\t\t\t.ext_obj_id = %u,\n"
+			"\t\t\t.func_id = %u,\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->ext_obj.ext_obj_id,
+			instr->ext_obj.func_id);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.ext_func = {\n"
+			"\t\t\t.ext_func_id = %u,\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->ext_func.ext_func_id);
+}
+
+static void
+instr_jmp_export(struct instruction *instr, FILE *f __rte_unused)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.jmp = {\n"
+		"\t\t\t.ip = NULL,\n",
+		instr_type_to_name(instr));
+
+	switch (instr->type) {
+	case INSTR_JMP_VALID:
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"\t\t\t.header_id = %u,\n",
+			instr->jmp.header_id);
+		break;
+
+	case INSTR_JMP_ACTION_HIT:
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"\t\t\t.action_id = %u,\n",
+			instr->jmp.action_id);
+		break;
+
+	case INSTR_JMP_EQ:
+	case INSTR_JMP_EQ_MH:
+	case INSTR_JMP_EQ_HM:
+	case INSTR_JMP_EQ_HH:
+	case INSTR_JMP_NEQ:
+	case INSTR_JMP_NEQ_MH:
+	case INSTR_JMP_NEQ_HM:
+	case INSTR_JMP_NEQ_HH:
+	case INSTR_JMP_LT:
+	case INSTR_JMP_LT_MH:
+	case INSTR_JMP_LT_HM:
+	case INSTR_JMP_LT_HH:
+	case INSTR_JMP_GT:
+	case INSTR_JMP_GT_MH:
+	case INSTR_JMP_GT_HM:
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"\t\t\t.a = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.b = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->jmp.a.struct_id,
+			instr->jmp.a.n_bits,
+			instr->jmp.a.offset,
+			instr->jmp.b.struct_id,
+			instr->jmp.b.n_bits,
+			instr->jmp.b.offset);
+		break;
+
+	case INSTR_JMP_EQ_I:
+	case INSTR_JMP_NEQ_I:
+	case INSTR_JMP_LT_MI:
+	case INSTR_JMP_LT_HI:
+	case INSTR_JMP_GT_MI:
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"\t\t\t.a = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.b_val = %" PRIu64 ",\n",
+			instr->jmp.a.struct_id,
+			instr->jmp.a.n_bits,
+			instr->jmp.a.offset,
+			instr->jmp.b_val);
+		break;
+
+	default:
+		break;
+	}
+
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+}
+
+static void
+instr_return_export(struct instruction *instr,
+		FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	fprintf(f,
+		"\t},\n");
+}
+
+static instruction_export_t export_table[] = {
+	[INSTR_RX] = instr_io_export,
+
+	[INSTR_TX] = instr_io_export,
+	[INSTR_TX_I] = instr_io_export,
+
+	[INSTR_HDR_EXTRACT] = instr_io_export,
+	[INSTR_HDR_EXTRACT2] = instr_io_export,
+	[INSTR_HDR_EXTRACT3] = instr_io_export,
+	[INSTR_HDR_EXTRACT4] = instr_io_export,
+	[INSTR_HDR_EXTRACT5] = instr_io_export,
+	[INSTR_HDR_EXTRACT6] = instr_io_export,
+	[INSTR_HDR_EXTRACT7] = instr_io_export,
+	[INSTR_HDR_EXTRACT8] = instr_io_export,
+
+	[INSTR_HDR_EXTRACT_M] = instr_io_export,
+
+	[INSTR_HDR_LOOKAHEAD] = instr_io_export,
+
+	[INSTR_HDR_EMIT] = instr_io_export,
+	[INSTR_HDR_EMIT_TX] = instr_io_export,
+	[INSTR_HDR_EMIT2_TX] = instr_io_export,
+	[INSTR_HDR_EMIT3_TX] = instr_io_export,
+	[INSTR_HDR_EMIT4_TX] = instr_io_export,
+	[INSTR_HDR_EMIT5_TX] = instr_io_export,
+	[INSTR_HDR_EMIT6_TX] = instr_io_export,
+	[INSTR_HDR_EMIT7_TX] = instr_io_export,
+	[INSTR_HDR_EMIT8_TX] = instr_io_export,
+
+	[INSTR_HDR_VALIDATE] = instr_hdr_validate_export,
+	[INSTR_HDR_INVALIDATE] = instr_hdr_validate_export,
+
+	[INSTR_MOV] = instr_mov_export,
+	[INSTR_MOV_MH] = instr_mov_export,
+	[INSTR_MOV_HM] = instr_mov_export,
+	[INSTR_MOV_HH] = instr_mov_export,
+	[INSTR_MOV_I] = instr_mov_export,
+
+	[INSTR_DMA_HT]  = instr_dma_ht_export,
+	[INSTR_DMA_HT2] = instr_dma_ht_export,
+	[INSTR_DMA_HT3] = instr_dma_ht_export,
+	[INSTR_DMA_HT4] = instr_dma_ht_export,
+	[INSTR_DMA_HT5] = instr_dma_ht_export,
+	[INSTR_DMA_HT6] = instr_dma_ht_export,
+	[INSTR_DMA_HT7] = instr_dma_ht_export,
+	[INSTR_DMA_HT8] = instr_dma_ht_export,
+
+	[INSTR_ALU_ADD] = instr_alu_export,
+	[INSTR_ALU_ADD_MH] = instr_alu_export,
+	[INSTR_ALU_ADD_HM] = instr_alu_export,
+	[INSTR_ALU_ADD_HH] = instr_alu_export,
+	[INSTR_ALU_ADD_MI] = instr_alu_export,
+	[INSTR_ALU_ADD_HI] = instr_alu_export,
+
+	[INSTR_ALU_SUB] = instr_alu_export,
+	[INSTR_ALU_SUB_MH] = instr_alu_export,
+	[INSTR_ALU_SUB_HM] = instr_alu_export,
+	[INSTR_ALU_SUB_HH] = instr_alu_export,
+	[INSTR_ALU_SUB_MI] = instr_alu_export,
+	[INSTR_ALU_SUB_HI] = instr_alu_export,
+
+	[INSTR_ALU_CKADD_FIELD] = instr_alu_export,
+	[INSTR_ALU_CKADD_STRUCT] = instr_alu_export,
+	[INSTR_ALU_CKADD_STRUCT20] = instr_alu_export,
+	[INSTR_ALU_CKSUB_FIELD] = instr_alu_export,
+
+	[INSTR_ALU_AND] = instr_alu_export,
+	[INSTR_ALU_AND_MH] = instr_alu_export,
+	[INSTR_ALU_AND_HM] = instr_alu_export,
+	[INSTR_ALU_AND_HH] = instr_alu_export,
+	[INSTR_ALU_AND_I] = instr_alu_export,
+
+	[INSTR_ALU_OR] = instr_alu_export,
+	[INSTR_ALU_OR_MH] = instr_alu_export,
+	[INSTR_ALU_OR_HM] = instr_alu_export,
+	[INSTR_ALU_OR_HH] = instr_alu_export,
+	[INSTR_ALU_OR_I] = instr_alu_export,
+
+	[INSTR_ALU_XOR] = instr_alu_export,
+	[INSTR_ALU_XOR_MH] = instr_alu_export,
+	[INSTR_ALU_XOR_HM] = instr_alu_export,
+	[INSTR_ALU_XOR_HH] = instr_alu_export,
+	[INSTR_ALU_XOR_I] = instr_alu_export,
+
+	[INSTR_ALU_SHL] = instr_alu_export,
+	[INSTR_ALU_SHL_MH] = instr_alu_export,
+	[INSTR_ALU_SHL_HM] = instr_alu_export,
+	[INSTR_ALU_SHL_HH] = instr_alu_export,
+	[INSTR_ALU_SHL_MI] = instr_alu_export,
+	[INSTR_ALU_SHL_HI] = instr_alu_export,
+
+	[INSTR_ALU_SHR] = instr_alu_export,
+	[INSTR_ALU_SHR_MH] = instr_alu_export,
+	[INSTR_ALU_SHR_HM] = instr_alu_export,
+	[INSTR_ALU_SHR_HH] = instr_alu_export,
+	[INSTR_ALU_SHR_MI] = instr_alu_export,
+	[INSTR_ALU_SHR_HI] = instr_alu_export,
+
+	[INSTR_REGPREFETCH_RH] = instr_reg_export,
+	[INSTR_REGPREFETCH_RM] = instr_reg_export,
+	[INSTR_REGPREFETCH_RI] = instr_reg_export,
+
+	[INSTR_REGRD_HRH] = instr_reg_export,
+	[INSTR_REGRD_HRM] = instr_reg_export,
+	[INSTR_REGRD_MRH] = instr_reg_export,
+	[INSTR_REGRD_MRM] = instr_reg_export,
+	[INSTR_REGRD_HRI] = instr_reg_export,
+	[INSTR_REGRD_MRI] = instr_reg_export,
+
+	[INSTR_REGWR_RHH] = instr_reg_export,
+	[INSTR_REGWR_RHM] = instr_reg_export,
+	[INSTR_REGWR_RMH] = instr_reg_export,
+	[INSTR_REGWR_RMM] = instr_reg_export,
+	[INSTR_REGWR_RHI] = instr_reg_export,
+	[INSTR_REGWR_RMI] = instr_reg_export,
+	[INSTR_REGWR_RIH] = instr_reg_export,
+	[INSTR_REGWR_RIM] = instr_reg_export,
+	[INSTR_REGWR_RII] = instr_reg_export,
+
+	[INSTR_REGADD_RHH] = instr_reg_export,
+	[INSTR_REGADD_RHM] = instr_reg_export,
+	[INSTR_REGADD_RMH] = instr_reg_export,
+	[INSTR_REGADD_RMM] = instr_reg_export,
+	[INSTR_REGADD_RHI] = instr_reg_export,
+	[INSTR_REGADD_RMI] = instr_reg_export,
+	[INSTR_REGADD_RIH] = instr_reg_export,
+	[INSTR_REGADD_RIM] = instr_reg_export,
+	[INSTR_REGADD_RII] = instr_reg_export,
+
+	[INSTR_METPREFETCH_H] = instr_meter_export,
+	[INSTR_METPREFETCH_M] = instr_meter_export,
+	[INSTR_METPREFETCH_I] = instr_meter_export,
+
+	[INSTR_METER_HHM] = instr_meter_export,
+	[INSTR_METER_HHI] = instr_meter_export,
+	[INSTR_METER_HMM] = instr_meter_export,
+	[INSTR_METER_HMI] = instr_meter_export,
+	[INSTR_METER_MHM] = instr_meter_export,
+	[INSTR_METER_MHI] = instr_meter_export,
+	[INSTR_METER_MMM] = instr_meter_export,
+	[INSTR_METER_MMI] = instr_meter_export,
+	[INSTR_METER_IHM] = instr_meter_export,
+	[INSTR_METER_IHI] = instr_meter_export,
+	[INSTR_METER_IMM] = instr_meter_export,
+	[INSTR_METER_IMI] = instr_meter_export,
+
+	[INSTR_TABLE] = instr_table_export,
+	[INSTR_TABLE_AF] = instr_table_export,
+	[INSTR_SELECTOR] = instr_table_export,
+	[INSTR_LEARNER] = instr_table_export,
+	[INSTR_LEARNER_AF] = instr_table_export,
+
+	[INSTR_LEARNER_LEARN] = instr_learn_export,
+	[INSTR_LEARNER_FORGET] = instr_forget_export,
+
+	[INSTR_EXTERN_OBJ] = instr_extern_export,
+	[INSTR_EXTERN_FUNC] = instr_extern_export,
+
+	[INSTR_JMP] = instr_jmp_export,
+	[INSTR_JMP_VALID] = instr_jmp_export,
+	[INSTR_JMP_INVALID] = instr_jmp_export,
+	[INSTR_JMP_HIT] = instr_jmp_export,
+	[INSTR_JMP_MISS] = instr_jmp_export,
+	[INSTR_JMP_ACTION_HIT] = instr_jmp_export,
+	[INSTR_JMP_ACTION_MISS] = instr_jmp_export,
+
+	[INSTR_JMP_EQ] = instr_jmp_export,
+	[INSTR_JMP_EQ_MH] = instr_jmp_export,
+	[INSTR_JMP_EQ_HM] = instr_jmp_export,
+	[INSTR_JMP_EQ_HH] = instr_jmp_export,
+	[INSTR_JMP_EQ_I] = instr_jmp_export,
+
+	[INSTR_JMP_NEQ] = instr_jmp_export,
+	[INSTR_JMP_NEQ_MH] = instr_jmp_export,
+	[INSTR_JMP_NEQ_HM] = instr_jmp_export,
+	[INSTR_JMP_NEQ_HH] = instr_jmp_export,
+	[INSTR_JMP_NEQ_I] = instr_jmp_export,
+
+	[INSTR_JMP_LT] = instr_jmp_export,
+	[INSTR_JMP_LT_MH] = instr_jmp_export,
+	[INSTR_JMP_LT_HM] = instr_jmp_export,
+	[INSTR_JMP_LT_HH] = instr_jmp_export,
+	[INSTR_JMP_LT_MI] = instr_jmp_export,
+	[INSTR_JMP_LT_HI] = instr_jmp_export,
+
+	[INSTR_JMP_GT] = instr_jmp_export,
+	[INSTR_JMP_GT_MH] = instr_jmp_export,
+	[INSTR_JMP_GT_HM] = instr_jmp_export,
+	[INSTR_JMP_GT_HH] = instr_jmp_export,
+	[INSTR_JMP_GT_MI] = instr_jmp_export,
+	[INSTR_JMP_GT_HI] = instr_jmp_export,
+
+	[INSTR_RETURN] = instr_return_export,
+};
+
+static void
+action_data_codegen(struct action *a, FILE *f)
+{
+	uint32_t i;
+
+	fprintf(f,
+		"static const struct instruction action_%s_instructions[] = {\n",
+		a->name);
+
+	for (i = 0; i < a->n_instructions; i++) {
+		struct instruction *instr = &a->instructions[i];
+		instruction_export_t func = export_table[instr->type];
+
+		func(instr, f);
+	}
+
+	fprintf(f, "};\n");
+}
+
 static int
 pipeline_codegen(struct rte_swx_pipeline *p)
 {
+	struct action *a;
 	FILE *f = NULL;
 
 	if (!p)
@@ -9786,6 +10874,15 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 	/* Include the .h file. */
 	fprintf(f, "#include \"rte_swx_pipeline_internal.h\"\n");
 
+	/* Add the code for each action. */
+	TAILQ_FOREACH(a, &p->actions, node) {
+		fprintf(f, "/**\n * Action %s\n */\n\n", a->name);
+
+		action_data_codegen(a, f);
+
+		fprintf(f, "\n");
+	}
+
 	/* Close the .c file. */
 	fclose(f);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 21/24] pipeline: generate action functions
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (18 preceding siblings ...)
  2021-09-10 12:29 ` [dpdk-dev] [PATCH 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
@ 2021-09-10 12:30 ` Cristian Dumitrescu
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
                   ` (3 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:30 UTC (permalink / raw)
  To: dev

Generate a C function for each action. For most instructions, the
associated inline function is called directly. Special care is taken
for TX, jump and return instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 662 ++++++++++++++++++++++++++++++++
 1 file changed, 662 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 0153c70b5f..019dbafbf3 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1376,6 +1376,26 @@ instruction_is_tx(enum instruction_type type)
 	}
 }
 
+static int
+instruction_does_tx(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_TX:
+	case INSTR_TX_I:
+	case INSTR_HDR_EMIT_TX:
+	case INSTR_HDR_EMIT2_TX:
+	case INSTR_HDR_EMIT3_TX:
+	case INSTR_HDR_EMIT4_TX:
+	case INSTR_HDR_EMIT5_TX:
+	case INSTR_HDR_EMIT6_TX:
+	case INSTR_HDR_EMIT7_TX:
+	case INSTR_HDR_EMIT8_TX:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 static int
 instruction_is_jmp(struct instruction *instr)
 {
@@ -10857,6 +10877,644 @@ action_data_codegen(struct action *a, FILE *f)
 	fprintf(f, "};\n");
 }
 
+static const char *
+instr_type_to_func(struct instruction *instr)
+{
+	switch (instr->type) {
+		case INSTR_RX: return NULL;
+
+		case INSTR_TX: return "__instr_tx_exec";
+		case INSTR_TX_I: return "__instr_tx_i_exec";
+
+		case INSTR_HDR_EXTRACT: return "__instr_hdr_extract_exec";
+		case INSTR_HDR_EXTRACT2: return "__instr_hdr_extract2_exec";
+		case INSTR_HDR_EXTRACT3: return "__instr_hdr_extract3_exec";
+		case INSTR_HDR_EXTRACT4: return "__instr_hdr_extract4_exec";
+		case INSTR_HDR_EXTRACT5: return "__instr_hdr_extract5_exec";
+		case INSTR_HDR_EXTRACT6: return "__instr_hdr_extract6_exec";
+		case INSTR_HDR_EXTRACT7: return "__instr_hdr_extract7_exec";
+		case INSTR_HDR_EXTRACT8: return "__instr_hdr_extract8_exec";
+
+		case INSTR_HDR_EXTRACT_M: return "__instr_hdr_extract_m_exec";
+
+		case INSTR_HDR_LOOKAHEAD: return "__instr_hdr_lookahead_exec";
+
+		case INSTR_HDR_EMIT: return "__instr_hdr_emit_exec";
+		case INSTR_HDR_EMIT_TX: return "__instr_hdr_emit_tx_exec";
+		case INSTR_HDR_EMIT2_TX: return "__instr_hdr_emit2_tx_exec";
+		case INSTR_HDR_EMIT3_TX: return "__instr_hdr_emit3_tx_exec";
+		case INSTR_HDR_EMIT4_TX: return "__instr_hdr_emit4_tx_exec";
+		case INSTR_HDR_EMIT5_TX: return "__instr_hdr_emit5_tx_exec";
+		case INSTR_HDR_EMIT6_TX: return "__instr_hdr_emit6_tx_exec";
+		case INSTR_HDR_EMIT7_TX: return "__instr_hdr_emit7_tx_exec";
+		case INSTR_HDR_EMIT8_TX: return "__instr_hdr_emit8_tx_exec";
+
+		case INSTR_HDR_VALIDATE: return "__instr_hdr_validate_exec";
+		case INSTR_HDR_INVALIDATE: return "__instr_hdr_invalidate_exec";
+
+		case INSTR_MOV: return "__instr_mov_exec";
+		case INSTR_MOV_MH: return "__instr_mov_mh_exec";
+		case INSTR_MOV_HM: return "__instr_mov_hm_exec";
+		case INSTR_MOV_HH: return "__instr_mov_hh_exec";
+		case INSTR_MOV_I: return "__instr_mov_i_exec";
+
+		case INSTR_DMA_HT: return "__instr_dma_ht_exec";
+		case INSTR_DMA_HT2: return "__instr_dma_ht2_exec";
+		case INSTR_DMA_HT3: return "__instr_dma_ht3_exec";
+		case INSTR_DMA_HT4: return "__instr_dma_ht4_exec";
+		case INSTR_DMA_HT5: return "__instr_dma_ht5_exec";
+		case INSTR_DMA_HT6: return "__instr_dma_ht6_exec";
+		case INSTR_DMA_HT7: return "__instr_dma_ht7_exec";
+		case INSTR_DMA_HT8: return "__instr_dma_ht8_exec";
+
+		case INSTR_ALU_ADD: return "__instr_alu_add_exec";
+		case INSTR_ALU_ADD_MH: return "__instr_alu_add_mh_exec";
+		case INSTR_ALU_ADD_HM: return "__instr_alu_add_hm_exec";
+		case INSTR_ALU_ADD_HH: return "__instr_alu_add_hh_exec";
+		case INSTR_ALU_ADD_MI: return "__instr_alu_add_mi_exec";
+		case INSTR_ALU_ADD_HI: return "__instr_alu_add_hi_exec";
+
+		case INSTR_ALU_SUB: return "__instr_alu_sub_exec";
+		case INSTR_ALU_SUB_MH: return "__instr_alu_sub_mh_exec";
+		case INSTR_ALU_SUB_HM: return "__instr_alu_sub_hm_exec";
+		case INSTR_ALU_SUB_HH: return "__instr_alu_sub_hh_exec";
+		case INSTR_ALU_SUB_MI: return "__instr_alu_sub_mi_exec";
+		case INSTR_ALU_SUB_HI: return "__instr_alu_sub_hi_exec";
+
+		case INSTR_ALU_CKADD_FIELD: return "__instr_alu_ckadd_field_exec";
+		case INSTR_ALU_CKADD_STRUCT20: return "__instr_alu_ckadd_struct20_exec";
+		case INSTR_ALU_CKADD_STRUCT: return "__instr_alu_ckadd_struct_exec";
+		case INSTR_ALU_CKSUB_FIELD: return "__instr_alu_cksub_field_exec";
+
+		case INSTR_ALU_AND: return "__instr_alu_and_exec";
+		case INSTR_ALU_AND_MH: return "__instr_alu_and_mh_exec";
+		case INSTR_ALU_AND_HM: return "__instr_alu_and_hm_exec";
+		case INSTR_ALU_AND_HH: return "__instr_alu_and_hh_exec";
+		case INSTR_ALU_AND_I: return "__instr_alu_and_i_exec";
+
+		case INSTR_ALU_OR: return "__instr_alu_or_exec";
+		case INSTR_ALU_OR_MH: return "__instr_alu_or_mh_exec";
+		case INSTR_ALU_OR_HM: return "__instr_alu_or_hm_exec";
+		case INSTR_ALU_OR_HH: return "__instr_alu_or_hh_exec";
+		case INSTR_ALU_OR_I: return "__instr_alu_or_i_exec";
+
+		case INSTR_ALU_XOR: return "__instr_alu_xor_exec";
+		case INSTR_ALU_XOR_MH: return "__instr_alu_xor_mh_exec";
+		case INSTR_ALU_XOR_HM: return "__instr_alu_xor_hm_exec";
+		case INSTR_ALU_XOR_HH: return "__instr_alu_xor_hh_exec";
+		case INSTR_ALU_XOR_I: return "__instr_alu_xor_i_exec";
+
+		case INSTR_ALU_SHL: return "__instr_alu_shl_exec";
+		case INSTR_ALU_SHL_MH: return "__instr_alu_shl_mh_exec";
+		case INSTR_ALU_SHL_HM: return "__instr_alu_shl_hm_exec";
+		case INSTR_ALU_SHL_HH: return "__instr_alu_shl_hh_exec";
+		case INSTR_ALU_SHL_MI: return "__instr_alu_shl_mi_exec";
+		case INSTR_ALU_SHL_HI: return "__instr_alu_shl_hi_exec";
+
+		case INSTR_ALU_SHR: return "__instr_alu_shr_exec";
+		case INSTR_ALU_SHR_MH: return "__instr_alu_shr_mh_exec";
+		case INSTR_ALU_SHR_HM: return "__instr_alu_shr_hm_exec";
+		case INSTR_ALU_SHR_HH: return "__instr_alu_shr_hh_exec";
+		case INSTR_ALU_SHR_MI: return "__instr_alu_shr_mi_exec";
+		case INSTR_ALU_SHR_HI: return "__instr_alu_shr_hi_exec";
+
+		case INSTR_REGPREFETCH_RH: return "__instr_regprefetch_rh_exec";
+		case INSTR_REGPREFETCH_RM: return "__instr_regprefetch_rm_exec";
+		case INSTR_REGPREFETCH_RI: return "__instr_regprefetch_ri_exec";
+
+		case INSTR_REGRD_HRH: return "__instr_regrd_hrh_exec";
+		case INSTR_REGRD_HRM: return "__instr_regrd_hrm_exec";
+		case INSTR_REGRD_HRI: return "__instr_regrd_hri_exec";
+		case INSTR_REGRD_MRH: return "__instr_regrd_mrh_exec";
+		case INSTR_REGRD_MRM: return "__instr_regrd_mrm_exec";
+		case INSTR_REGRD_MRI: return "__instr_regrd_mri_exec";
+
+		case INSTR_REGWR_RHH: return "__instr_regwr_rhh_exec";
+		case INSTR_REGWR_RHM: return "__instr_regwr_rhm_exec";
+		case INSTR_REGWR_RHI: return "__instr_regwr_rhi_exec";
+		case INSTR_REGWR_RMH: return "__instr_regwr_rmh_exec";
+		case INSTR_REGWR_RMM: return "__instr_regwr_rmm_exec";
+		case INSTR_REGWR_RMI: return "__instr_regwr_rmi_exec";
+		case INSTR_REGWR_RIH: return "__instr_regwr_rih_exec";
+		case INSTR_REGWR_RIM: return "__instr_regwr_rim_exec";
+		case INSTR_REGWR_RII: return "__instr_regwr_rii_exec";
+
+		case INSTR_REGADD_RHH: return "__instr_regadd_rhh_exec";
+		case INSTR_REGADD_RHM: return "__instr_regadd_rhm_exec";
+		case INSTR_REGADD_RHI: return "__instr_regadd_rhi_exec";
+		case INSTR_REGADD_RMH: return "__instr_regadd_rmh_exec";
+		case INSTR_REGADD_RMM: return "__instr_regadd_rmm_exec";
+		case INSTR_REGADD_RMI: return "__instr_regadd_rmi_exec";
+		case INSTR_REGADD_RIH: return "__instr_regadd_rih_exec";
+		case INSTR_REGADD_RIM: return "__instr_regadd_rim_exec";
+		case INSTR_REGADD_RII: return "__instr_regadd_rii_exec";
+
+		case INSTR_METPREFETCH_H: return "__instr_metprefetch_h_exec";
+		case INSTR_METPREFETCH_M: return "__instr_metprefetch_m_exec";
+		case INSTR_METPREFETCH_I: return "__instr_metprefetch_i_exec";
+
+		case INSTR_METER_HHM: return "__instr_meter_hhm_exec";
+		case INSTR_METER_HHI: return "__instr_meter_hhi_exec";
+		case INSTR_METER_HMM: return "__instr_meter_hmm_exec";
+		case INSTR_METER_HMI: return "__instr_meter_hmi_exec";
+		case INSTR_METER_MHM: return "__instr_meter_mhm_exec";
+		case INSTR_METER_MHI: return "__instr_meter_mhi_exec";
+		case INSTR_METER_MMM: return "__instr_meter_mmm_exec";
+		case INSTR_METER_MMI: return "__instr_meter_mmi_exec";
+		case INSTR_METER_IHM: return "__instr_meter_ihm_exec";
+		case INSTR_METER_IHI: return "__instr_meter_ihi_exec";
+		case INSTR_METER_IMM: return "__instr_meter_imm_exec";
+		case INSTR_METER_IMI: return "__instr_meter_imi_exec";
+
+		case INSTR_TABLE: return NULL;
+		case INSTR_TABLE_AF: return NULL;
+		case INSTR_SELECTOR: return NULL;
+		case INSTR_LEARNER: return NULL;
+		case INSTR_LEARNER_AF: return NULL;
+
+		case INSTR_LEARNER_LEARN: return "__instr_learn_exec";
+		case INSTR_LEARNER_FORGET: return "__instr_forget_exec";
+
+		case INSTR_EXTERN_OBJ: return NULL;
+		case INSTR_EXTERN_FUNC: return NULL;
+
+		case INSTR_JMP: return NULL;
+		case INSTR_JMP_VALID: return NULL;
+		case INSTR_JMP_INVALID: return NULL;
+		case INSTR_JMP_HIT: return NULL;
+		case INSTR_JMP_MISS: return NULL;
+		case INSTR_JMP_ACTION_HIT: return NULL;
+		case INSTR_JMP_ACTION_MISS: return NULL;
+		case INSTR_JMP_EQ: return NULL;
+		case INSTR_JMP_EQ_MH: return NULL;
+		case INSTR_JMP_EQ_HM: return NULL;
+		case INSTR_JMP_EQ_HH: return NULL;
+		case INSTR_JMP_EQ_I: return NULL;
+		case INSTR_JMP_NEQ: return NULL;
+		case INSTR_JMP_NEQ_MH: return NULL;
+		case INSTR_JMP_NEQ_HM: return NULL;
+		case INSTR_JMP_NEQ_HH: return NULL;
+		case INSTR_JMP_NEQ_I: return NULL;
+		case INSTR_JMP_LT: return NULL;
+		case INSTR_JMP_LT_MH: return NULL;
+		case INSTR_JMP_LT_HM: return NULL;
+		case INSTR_JMP_LT_HH: return NULL;
+		case INSTR_JMP_LT_MI: return NULL;
+		case INSTR_JMP_LT_HI: return NULL;
+		case INSTR_JMP_GT: return NULL;
+		case INSTR_JMP_GT_MH: return NULL;
+		case INSTR_JMP_GT_HM: return NULL;
+		case INSTR_JMP_GT_HH: return NULL;
+		case INSTR_JMP_GT_MI: return NULL;
+		case INSTR_JMP_GT_HI: return NULL;
+
+		case INSTR_RETURN: return NULL;
+
+		default: return NULL;
+	}
+}
+
+static void
+action_instr_does_tx_codegen(struct action *a,
+			uint32_t instr_pos,
+			struct instruction *instr,
+			FILE *f)
+{
+	fprintf(f,
+		"%s(p, t, &action_%s_instructions[%u]);\n"
+		"\tthread_ip_reset(p, t);\n"
+		"\tinstr_rx_exec(p);\n"
+		"\treturn;\n",
+		instr_type_to_func(instr),
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_extern_obj_codegen(struct action *a,
+				uint32_t instr_pos,
+				FILE *f)
+{
+	fprintf(f,
+		"while (!__instr_extern_obj_exec(p, t, &action_%s_instructions[%u]));\n",
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_extern_func_codegen(struct action *a,
+				 uint32_t instr_pos,
+				 FILE *f)
+{
+	fprintf(f,
+		"while (!__instr_extern_func_exec(p, t, &action_%s_instructions[%u]));\n",
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_jmp_codegen(struct action *a,
+			 uint32_t instr_pos,
+			 struct instruction *instr,
+			 struct instruction_data *data,
+			 FILE *f)
+{
+	switch (instr->type) {
+	case INSTR_JMP:
+		fprintf(f,
+			"goto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_VALID:
+		fprintf(f,
+			"if (HEADER_VALID(t, action_%s_instructions[%u].jmp.header_id))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"if (!HEADER_VALID(t, action_%s_instructions[%u].jmp.header_id))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_HIT:
+		fprintf(f,
+			"if (t->hit)\n"
+			"\t\tgoto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_MISS:
+		fprintf(f,
+			"if (!t->hit)\n"
+			"\t\tgoto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_ACTION_HIT:
+		fprintf(f,
+			"if (t->action_id == action_%s_instructions[%u].jmp.action_id)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"if (t->action_id != action_%s_instructions[%u].jmp.action_id)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	default:
+		return;
+	}
+}
+
+static void
+action_instr_return_codegen(FILE *f)
+{
+	fprintf(f,
+		"return;\n");
+}
+
+static void
+action_instr_codegen(struct action *a, FILE *f)
+{
+	uint32_t i;
+
+	fprintf(f,
+		"void\n"
+		"action_%s_run(struct rte_swx_pipeline *p)\n"
+		"{\n"
+		"\tstruct thread *t = &p->threads[p->thread_id];\n"
+		"\n",
+		a->name);
+
+	for (i = 0; i < a->n_instructions; i++) {
+		struct instruction *instr = &a->instructions[i];
+		struct instruction_data *data = &a->instruction_data[i];
+
+		/* Label, if present. */
+		if (data->label[0])
+			fprintf(f, "\n%s : ", data->label);
+		else
+			fprintf(f, "\n\t");
+
+		/* TX instruction type. */
+		if (instruction_does_tx(instr)) {
+			action_instr_does_tx_codegen(a, i, instr, f);
+			continue;
+		}
+
+		/* Extern object/function instruction type. */
+		if (instr->type == INSTR_EXTERN_OBJ) {
+			action_instr_extern_obj_codegen(a, i, f);
+			continue;
+		}
+
+		if (instr->type == INSTR_EXTERN_FUNC) {
+			action_instr_extern_func_codegen(a, i, f);
+			continue;
+		}
+
+		/* Jump instruction type. */
+		if (instruction_is_jmp(instr)) {
+			action_instr_jmp_codegen(a, i, instr, data, f);
+			continue;
+		}
+
+		/* Return instruction type. */
+		if (instr->type == INSTR_RETURN) {
+			action_instr_return_codegen(f);
+			continue;
+		}
+
+		/* Any other instruction type. */
+		fprintf(f,
+			"%s(p, t, &action_%s_instructions[%u]);\n",
+			instr_type_to_func(instr),
+			a->name,
+			i);
+	}
+
+	fprintf(f, "}\n\n");
+}
+
 static int
 pipeline_codegen(struct rte_swx_pipeline *p)
 {
@@ -10881,6 +11539,10 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 		action_data_codegen(a, f);
 
 		fprintf(f, "\n");
+
+		action_instr_codegen(a, f);
+
+		fprintf(f, "\n");
 	}
 
 	/* Close the .c file. */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 22/24] pipeline: generate custom instruction functions
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (19 preceding siblings ...)
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 21/24] pipeline: generate action functions Cristian Dumitrescu
@ 2021-09-10 12:30 ` Cristian Dumitrescu
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
                   ` (2 subsequent siblings)
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:30 UTC (permalink / raw)
  To: dev

Generate a C function for each custom instruction, which essentially
consolidate multiple regular instructions into a single function call.
The pipeline program is split into groups of instructions, and a
custom instruction is generated for each group that has more than one
instruction. Special care is taken the instructions that can do thread
yield (RX, extern) and for those that can change the instruction
pointer (TX, near/far jump).

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 649 +++++++++++++++++++++++++++++++-
 1 file changed, 643 insertions(+), 6 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 019dbafbf3..0a4ac06467 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1436,6 +1436,24 @@ instruction_is_jmp(struct instruction *instr)
 	}
 }
 
+static int
+instruction_does_thread_yield(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_RX:
+	case INSTR_TABLE:
+	case INSTR_TABLE_AF:
+	case INSTR_SELECTOR:
+	case INSTR_LEARNER:
+	case INSTR_LEARNER_AF:
+	case INSTR_EXTERN_OBJ:
+	case INSTR_EXTERN_FUNC:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 static struct field *
 action_field_parse(struct action *action, const char *name);
 
@@ -11515,15 +11533,621 @@ action_instr_codegen(struct action *a, FILE *f)
 	fprintf(f, "}\n\n");
 }
 
+struct instruction_group {
+	TAILQ_ENTRY(instruction_group) node;
+
+	uint32_t group_id;
+
+	uint32_t first_instr_id;
+
+	uint32_t last_instr_id;
+
+	instr_exec_t func;
+};
+
+TAILQ_HEAD(instruction_group_list, instruction_group);
+
+static struct instruction_group *
+instruction_group_list_group_find(struct instruction_group_list *igl, uint32_t instruction_id)
+{
+	struct instruction_group *g;
+
+	TAILQ_FOREACH(g, igl, node)
+		if ((g->first_instr_id <= instruction_id) && (instruction_id <= g->last_instr_id))
+			return g;
+
+	return NULL;
+}
+
+static void
+instruction_group_list_free(struct instruction_group_list *igl)
+{
+	if (!igl)
+		return;
+
+	for ( ; ; ) {
+		struct instruction_group *g;
+
+		g = TAILQ_FIRST(igl);
+		if (!g)
+			break;
+
+		TAILQ_REMOVE(igl, g, node);
+		free(g);
+	}
+
+	free(igl);
+}
+
+static struct instruction_group_list *
+instruction_group_list_create(struct rte_swx_pipeline *p)
+{
+	struct instruction_group_list *igl = NULL;
+	struct instruction_group *g = NULL;
+	uint32_t n_groups = 0, i;
+
+	if (!p || !p->instructions || !p->instruction_data || !p->n_instructions)
+		goto error;
+
+	/* List init. */
+	igl = calloc(1, sizeof(struct instruction_group_list));
+	if (!igl)
+		goto error;
+
+	TAILQ_INIT(igl);
+
+	/* Allocate the first group. */
+	g = calloc(1, sizeof(struct instruction_group));
+	if (!g)
+		goto error;
+
+	/* Iteration 1: Separate the instructions into groups based on the thread yield
+	 * instructions. Do not worry about the jump instructions at this point.
+	 */
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+
+		/* Check for thread yeld instructions. */
+		if (!instruction_does_thread_yield(instr))
+			continue;
+
+		/* If the current group contains at least one instruction, then finalize it (with
+		 * the previous instruction), add it to the list and allocate a new group (that
+		 * starts with the current instruction).
+		 */
+		if (i - g->first_instr_id) {
+			/* Finalize the group. */
+			g->last_instr_id = i - 1;
+
+			/* Add the group to the list. Advance the number of groups. */
+			TAILQ_INSERT_TAIL(igl, g, node);
+			n_groups++;
+
+			/* Allocate a new group. */
+			g = calloc(1, sizeof(struct instruction_group));
+			if (!g)
+				goto error;
+
+			/* Initialize the new group. */
+			g->group_id = n_groups;
+			g->first_instr_id = i;
+		}
+
+		/* Finalize the current group (with the current instruction, therefore this group
+		 * contains just the current thread yield instruction), add it to the list and
+		 * allocate a new group (that starts with the next instruction).
+		 */
+
+		/* Finalize the group. */
+		g->last_instr_id = i;
+
+		/* Add the group to the list. Advance the number of groups. */
+		TAILQ_INSERT_TAIL(igl, g, node);
+		n_groups++;
+
+		/* Allocate a new group. */
+		g = calloc(1, sizeof(struct instruction_group));
+		if (!g)
+			goto error;
+
+		/* Initialize the new group. */
+		g->group_id = n_groups;
+		g->first_instr_id = i + 1;
+	}
+
+	/* Handle the last group. */
+	if (i - g->first_instr_id) {
+		/* Finalize the group. */
+		g->last_instr_id = i - 1;
+
+		/* Add the group to the list. Advance the number of groups. */
+		TAILQ_INSERT_TAIL(igl, g, node);
+		n_groups++;
+	} else
+		free(g);
+
+	g = NULL;
+
+	/* Iteration 2: Handle jumps. If the current group contains an instruction which represents
+	 * the destination of a jump instruction located in a different group ("far jump"), then the
+	 * current group has to be split, so that the instruction representing the far jump
+	 * destination is at the start of its group.
+	 */
+	for ( ; ; ) {
+		int is_modified = 0;
+
+		for (i = 0; i < p->n_instructions; i++) {
+			struct instruction_data *data = &p->instruction_data[i];
+			struct instruction_group *g;
+			uint32_t j;
+
+			/* Continue when the current instruction is not a jump destination. */
+			if (!data->n_users)
+				continue;
+
+			g = instruction_group_list_group_find(igl, i);
+			if (!g)
+				goto error;
+
+			/* Find out all the jump instructions with this destination. */
+			for (j = 0; j < p->n_instructions; j++) {
+				struct instruction *jmp_instr = &p->instructions[j];
+				struct instruction_data *jmp_data = &p->instruction_data[j];
+				struct instruction_group *jmp_g, *new_g;
+
+				/* Continue when not a jump instruction. Even when jump instruction,
+				 * continue when the jump destination is not this instruction.
+				 */
+				if (!instruction_is_jmp(jmp_instr) ||
+				    strcmp(jmp_data->jmp_label, data->label))
+					continue;
+
+				jmp_g = instruction_group_list_group_find(igl, j);
+				if (!jmp_g)
+					goto error;
+
+				/* Continue when both the jump instruction and the jump destination
+				 * instruction are in the same group. Even when in different groups,
+				 * still continue if the jump destination instruction is already the
+				 * first instruction of its group.
+				 */
+				if ((jmp_g->group_id == g->group_id) || (g->first_instr_id == i))
+					continue;
+
+				/* Split the group of the current jump destination instruction to
+				 * make this instruction the first instruction of a new group.
+				 */
+				new_g = calloc(1, sizeof(struct instruction_group));
+				if (!new_g)
+					goto error;
+
+				new_g->group_id = n_groups;
+				new_g->first_instr_id = i;
+				new_g->last_instr_id = g->last_instr_id;
+
+				g->last_instr_id = i - 1;
+
+				TAILQ_INSERT_AFTER(igl, g, new_g, node);
+				n_groups++;
+				is_modified = 1;
+
+				/* The decision to split this group (to make the current instruction
+				 * the first instruction of a new group) is already taken and fully
+				 * implemented, so no need to search for more reasons to do it.
+				 */
+				break;
+			}
+		}
+
+		/* Re-evaluate everything, as at least one group got split, so some jumps that were
+		 * previously considered local (i.e. the jump destination is in the same group as
+		 * the jump instruction) can now be "far jumps" (i.e. the jump destination is in a
+		 * different group than the jump instruction). Wost case scenario: each instruction
+		 * that is a jump destination ends up as the first instruction of its group.
+		 */
+		if (!is_modified)
+			break;
+	}
+
+	/* Re-assign the group IDs to be in incremental order. */
+	i = 0;
+	TAILQ_FOREACH(g, igl, node) {
+		g->group_id = i;
+
+		i++;
+	}
+
+	return igl;
+
+error:
+	instruction_group_list_free(igl);
+
+	free(g);
+
+	return NULL;
+}
+
+static void
+pipeline_instr_does_tx_codegen(struct rte_swx_pipeline *p __rte_unused,
+			       uint32_t instr_pos,
+			       struct instruction *instr,
+			       FILE *f)
+{
+	fprintf(f,
+		"%s(p, t, &pipeline_instructions[%u]);\n"
+		"\tthread_ip_reset(p, t);\n"
+		"\tinstr_rx_exec(p);\n"
+		"\treturn;\n",
+		instr_type_to_func(instr),
+		instr_pos);
+}
+
+static int
+pipeline_instr_jmp_codegen(struct rte_swx_pipeline *p,
+			   struct instruction_group_list *igl,
+			   uint32_t jmp_instr_id,
+			   struct instruction *jmp_instr,
+			   struct instruction_data *jmp_data,
+			   FILE *f)
+{
+	struct instruction_group *jmp_g, *g;
+	struct instruction_data *data;
+	uint32_t instr_id;
+
+	switch (jmp_instr->type) {
+	case INSTR_JMP:
+		break;
+
+	case INSTR_JMP_VALID:
+		fprintf(f,
+			"if (HEADER_VALID(t, pipeline_instructions[%u].jmp.header_id))",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"if (!HEADER_VALID(t, pipeline_instructions[%u].jmp.header_id))",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_HIT:
+		fprintf(f,
+			"if (t->hit)\n");
+		break;
+
+	case INSTR_JMP_MISS:
+		fprintf(f,
+			"if (!t->hit)\n");
+		break;
+
+	case INSTR_JMP_ACTION_HIT:
+		fprintf(f,
+			"if (t->action_id == pipeline_instructions[%u].jmp.action_id)",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"if (t->action_id != pipeline_instructions[%u].jmp.action_id)",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Find the instruction group of the jump instruction. */
+	jmp_g = instruction_group_list_group_find(igl, jmp_instr_id);
+	if (!jmp_g)
+		return -EINVAL;
+
+	/* Find the instruction group of the jump destination instruction. */
+	data = label_find(p->instruction_data, p->n_instructions, jmp_data->jmp_label);
+	if (!data)
+		return -EINVAL;
+
+	instr_id = data - p->instruction_data;
+
+	g = instruction_group_list_group_find(igl, instr_id);
+	if (!g)
+		return -EINVAL;
+
+	/* Code generation for "near" jump (same instruction group) or "far" jump (different
+	 * instruction group).
+	 */
+	if (g->group_id == jmp_g->group_id)
+		fprintf(f,
+			"\n\t\tgoto %s;\n",
+			jmp_data->jmp_label);
+	else
+		fprintf(f,
+			" {\n"
+			"\t\tthread_ip_set(t, &p->instructions[%u]);\n"
+			"\t\treturn;\n"
+			"\t}\n\n",
+			g->group_id);
+
+	return 0;
+}
+
+static void
+instruction_group_list_codegen(struct instruction_group_list *igl, struct rte_swx_pipeline *p, FILE *f)
+{
+	struct instruction_group *g;
+	uint32_t i;
+	int is_required = 0;
+
+	/* Check if code generation is required. */
+	TAILQ_FOREACH(g, igl, node)
+		if (g->first_instr_id < g->last_instr_id)
+			is_required = 1;
+
+	if (!is_required)
+		return;
+
+	/* Generate the code for the pipeline instruction array. */
+	fprintf(f,
+		"static const struct instruction pipeline_instructions[] = {\n");
+
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+		instruction_export_t func = export_table[instr->type];
+
+		func(instr, f);
+	}
+
+	fprintf(f, "};\n\n");
+
+	/* Generate the code for the pipeline functions: one function for each instruction group
+	 * that contains more than one instruction.
+	 */
+	TAILQ_FOREACH(g, igl, node) {
+		struct instruction *last_instr;
+		uint32_t j;
+
+		/* Skip if group contains a single instruction. */
+		if (g->last_instr_id == g->first_instr_id)
+			continue;
+
+		/* Generate new pipeline function. */
+		fprintf(f,
+			"void\n"
+			"pipeline_func_%u(struct rte_swx_pipeline *p)\n"
+			"{\n"
+			"\tstruct thread *t = &p->threads[p->thread_id];\n"
+			"\n",
+			g->group_id);
+
+		/* Generate the code for each pipeline instruction. */
+		for (j = g->first_instr_id; j <= g->last_instr_id; j++) {
+			struct instruction *instr = &p->instructions[j];
+			struct instruction_data *data = &p->instruction_data[j];
+
+			/* Label, if present. */
+			if (data->label[0])
+				fprintf(f, "\n%s : ", data->label);
+			else
+				fprintf(f, "\n\t");
+
+			/* TX instruction type. */
+			if (instruction_does_tx(instr)) {
+				pipeline_instr_does_tx_codegen(p, j, instr, f);
+				continue;
+			}
+
+			/* Jump instruction type. */
+			if (instruction_is_jmp(instr)) {
+				pipeline_instr_jmp_codegen(p, igl, j, instr, data, f);
+				continue;
+			}
+
+			/* Any other instruction type. */
+			fprintf(f,
+				"%s(p, t, &pipeline_instructions[%u]);\n",
+				instr_type_to_func(instr),
+				j);
+		}
+
+		/* Finalize the generated pipeline function. For some instructions such as TX,
+		 * emit-many-and-TX and unconditional jump, the next instruction has been already
+		 * decided unconditionally and the instruction pointer of the current thread set
+		 * accordingly; for all the other instructions, the instruction pointer must be
+		 * incremented now.
+		 */
+		last_instr = &p->instructions[g->last_instr_id];
+
+		if (!instruction_does_tx(last_instr) && (last_instr->type != INSTR_JMP))
+			fprintf(f,
+				"thread_ip_inc(p);\n");
+
+		fprintf(f,
+			"}\n"
+			"\n");
+	}
+}
+
 static int
-pipeline_codegen(struct rte_swx_pipeline *p)
+pipeline_codegen(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 {
 	struct action *a;
 	FILE *f = NULL;
 
-	if (!p)
-		return -EINVAL;
-
 	/* Create the .c file. */
 	f = fopen("/tmp/pipeline.c", "w");
 	if (!f)
@@ -11545,6 +12169,9 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 		fprintf(f, "\n");
 	}
 
+	/* Add the pipeline code. */
+	instruction_group_list_codegen(igl, p, f);
+
 	/* Close the .c file. */
 	fclose(f);
 
@@ -11554,12 +12181,22 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
+	struct instruction_group_list *igl = NULL;
 	int status = 0;
 
+	igl = instruction_group_list_create(p);
+	if (!igl) {
+		status = -ENOMEM;
+		goto free;
+	}
+
 	/* Code generation. */
-	status = pipeline_codegen(p);
+	status = pipeline_codegen(p, igl);
 	if (status)
-		return status;
+		goto free;
+
+free:
+	instruction_group_list_free(igl);
 
 	return status;
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 23/24] pipeline: build shared object for pipeline
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (20 preceding siblings ...)
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
@ 2021-09-10 12:30 ` Cristian Dumitrescu
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:30 UTC (permalink / raw)
  To: dev; +Cc: Cunming Liang

Build the generated C file into a shared object library.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 131 +++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h |   1 +
 2 files changed, 132 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 0a4ac06467..4be43c94a9 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <arpa/inet.h>
+#include <dlfcn.h>
 
 #include "rte_swx_pipeline_internal.h"
 
@@ -8945,9 +8946,13 @@ rte_swx_pipeline_config(struct rte_swx_pipeline **p, int numa_node)
 void
 rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 {
+	void *lib;
+
 	if (!p)
 		return;
 
+	lib = p->lib;
+
 	free(p->instruction_data);
 	free(p->instructions);
 
@@ -8967,6 +8972,9 @@ rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 	struct_free(p);
 
 	free(p);
+
+	if (lib)
+		dlclose(lib);
 }
 
 int
@@ -12178,6 +12186,124 @@ pipeline_codegen(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 	return 0;
 }
 
+#ifndef RTE_SWX_PIPELINE_CMD_MAX_SIZE
+#define RTE_SWX_PIPELINE_CMD_MAX_SIZE 4096
+#endif
+
+static int
+pipeline_libload(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
+{
+	struct action *a;
+	struct instruction_group *g;
+	char *dir_in, *buffer = NULL;
+	const char *dir_out;
+	int status = 0;
+
+	/* Get the environment variables. */
+	dir_in = getenv("RTE_INSTALL_DIR");
+	if (!dir_in) {
+		status = -EINVAL;
+		goto free;
+	}
+
+	dir_out = "/tmp";
+
+	/* Memory allocation for the command buffer. */
+	buffer = malloc(RTE_SWX_PIPELINE_CMD_MAX_SIZE);
+	if (!buffer) {
+		status = -ENOMEM;
+		goto free;
+	}
+
+	snprintf(buffer,
+		 RTE_SWX_PIPELINE_CMD_MAX_SIZE,
+		 "gcc -c -O3 -fpic -Wno-deprecated-declarations -o %s/pipeline.o %s/pipeline.c "
+		 "-I %s/lib/pipeline "
+		 "-I %s/lib/eal/include "
+		 "-I %s/lib/eal/x86/include "
+		 "-I %s/lib/eal/include/generic "
+		 "-I %s/lib/meter "
+		 "-I %s/lib/port "
+		 "-I %s/lib/table "
+		 "-I %s/lib/pipeline "
+		 "-I %s/config "
+		 "-I %s/build "
+		 "-I %s/lib/eal/linux/include "
+		 ">%s/pipeline.log 2>&1 "
+		 "&& "
+		 "gcc -shared %s/pipeline.o -o %s/libpipeline.so "
+		 ">>%s/pipeline.log 2>&1",
+		 dir_out,
+		 dir_out,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_out,
+		 dir_out,
+		 dir_out,
+		 dir_out);
+
+	/* Build the shared object library. */
+	status = system(buffer);
+	if (status)
+		goto free;
+
+	/* Open library. */
+	snprintf(buffer,
+		 RTE_SWX_PIPELINE_CMD_MAX_SIZE,
+		 "%s/libpipeline.so",
+		 dir_out);
+
+	p->lib = dlopen(buffer, RTLD_LAZY);
+	if (!p->lib) {
+		status = -EIO;
+		goto free;
+	}
+
+	/* Get the action function symbols. */
+	TAILQ_FOREACH(a, &p->actions, node) {
+		snprintf(buffer, RTE_SWX_PIPELINE_CMD_MAX_SIZE, "action_%s_run", a->name);
+
+		p->action_funcs[a->id] = dlsym(p->lib, buffer);
+		if (!p->action_funcs[a->id]) {
+			status = -EINVAL;
+			goto free;
+		}
+	}
+
+	/* Get the pipeline function symbols. */
+	TAILQ_FOREACH(g, igl, node) {
+		if (g->first_instr_id == g->last_instr_id)
+			continue;
+
+		snprintf(buffer, RTE_SWX_PIPELINE_CMD_MAX_SIZE, "pipeline_func_%u", g->group_id);
+
+		g->func = dlsym(p->lib, buffer);
+		if (!g->func) {
+			status = -EINVAL;
+			goto free;
+		}
+	}
+
+free:
+	if (status && p->lib) {
+		dlclose(p->lib);
+		p->lib = NULL;
+	}
+
+	free(buffer);
+
+	return status;
+}
+
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
@@ -12195,6 +12321,11 @@ pipeline_compile(struct rte_swx_pipeline *p)
 	if (status)
 		goto free;
 
+	/* Build and load the shared object library. */
+	status = pipeline_libload(p, igl);
+	if (status)
+		goto free;
+
 free:
 	instruction_group_list_free(igl);
 
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 4ad6dd42dd..a210df4856 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1400,6 +1400,7 @@ struct rte_swx_pipeline {
 	struct instruction *instructions;
 	struct instruction_data *instruction_data;
 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
+	void *lib;
 
 	uint32_t n_structs;
 	uint32_t n_ports_in;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH 24/24] pipeline: enable pipeline compilation
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (21 preceding siblings ...)
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
@ 2021-09-10 12:30 ` Cristian Dumitrescu
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
  23 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 12:30 UTC (permalink / raw)
  To: dev

Commit the pipeline changes when the compilation process is
successful: change the table lookup instructions to execute the action
function for each action, replace the regular pipeline instructions
with the custom instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 55 +++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 4be43c94a9..88b8687278 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -12304,6 +12304,58 @@ pipeline_libload(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 	return status;
 }
 
+static void
+pipeline_adjust(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
+{
+	struct instruction_group *g;
+	uint32_t i;
+
+	/* Pipeline table instructions. */
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+
+		if (instr->type == INSTR_TABLE)
+			instr->type = INSTR_TABLE_AF;
+
+		if (instr->type == INSTR_LEARNER)
+			instr->type = INSTR_LEARNER_AF;
+	}
+
+	/* Pipeline custom instructions. */
+	i = 0;
+	TAILQ_FOREACH(g, igl, node) {
+		struct instruction *instr = &p->instructions[g->first_instr_id];
+		uint32_t j;
+
+		if (g->first_instr_id == g->last_instr_id)
+			continue;
+
+		/* Install a new custom instruction. */
+		instruction_table[INSTR_CUSTOM_0 + i] = g->func;
+
+		/* First instruction of the group: change its type to the new custom instruction. */
+		instr->type = INSTR_CUSTOM_0 + i;
+
+		/* All the subsequent instructions of the group: invalidate. */
+		for (j = g->first_instr_id + 1; j <= g->last_instr_id; j++) {
+			struct instruction_data *data = &p->instruction_data[j];
+
+			data->invalid = 1;
+		}
+
+		i++;
+	}
+
+	/* Remove the invalidated instructions. */
+	p->n_instructions = instr_compact(p->instructions, p->instruction_data, p->n_instructions);
+
+	/* Resolve the jump destination for any "standalone" jump instructions (i.e. those jump
+	 * instructions that are the only instruction within their group, so they were left
+	 * unmodified).
+	 */
+	instr_jmp_resolve(p->instructions, p->instruction_data, p->n_instructions);
+}
+
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
@@ -12326,6 +12378,9 @@ pipeline_compile(struct rte_swx_pipeline *p)
 	if (status)
 		goto free;
 
+	/* Adjust instructions. */
+	pipeline_adjust(p, igl);
+
 free:
 	instruction_group_list_free(igl);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file
  2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                   ` (22 preceding siblings ...)
  2021-09-10 12:30 ` [dpdk-dev] [PATCH 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
@ 2021-09-10 13:36 ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
                     ` (24 more replies)
  23 siblings, 25 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Start to consolidate the data structures and inline functions required
by the pipeline instructions into an internal header file.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")

 lib/pipeline/meson.build                 |    4 +
 lib/pipeline/rte_swx_pipeline.c          | 1373 +--------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 1383 ++++++++++++++++++++++
 3 files changed, 1388 insertions(+), 1372 deletions(-)
 create mode 100644 lib/pipeline/rte_swx_pipeline_internal.h

diff --git a/lib/pipeline/meson.build b/lib/pipeline/meson.build
index 9132bb517a..ec009631bf 100644
--- a/lib/pipeline/meson.build
+++ b/lib/pipeline/meson.build
@@ -18,3 +18,7 @@ headers = files(
         'rte_swx_ctl.h',
 )
 deps += ['port', 'table', 'meter', 'sched', 'cryptodev']
+
+indirect_headers += files(
+        'rte_swx_pipeline_internal.h',
+)
diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index f89a134a52..ae9b2056db 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2,24 +2,11 @@
  * Copyright(c) 2020 Intel Corporation
  */
 #include <stdlib.h>
-#include <string.h>
 #include <stdio.h>
 #include <errno.h>
-#include <inttypes.h>
-#include <sys/queue.h>
 #include <arpa/inet.h>
 
-#include <rte_common.h>
-#include <rte_prefetch.h>
-#include <rte_byteorder.h>
-#include <rte_cycles.h>
-#include <rte_meter.h>
-
-#include <rte_swx_table_selector.h>
-#include <rte_swx_table_learner.h>
-
-#include "rte_swx_pipeline.h"
-#include "rte_swx_ctl.h"
+#include "rte_swx_pipeline_internal.h"
 
 #define CHECK(condition, err_code)                                             \
 do {                                                                           \
@@ -40,22 +27,9 @@ do {                                                                           \
 	       RTE_SWX_INSTRUCTION_SIZE),                                      \
 	      err_code)
 
-#ifndef TRACE_LEVEL
-#define TRACE_LEVEL 0
-#endif
-
-#if TRACE_LEVEL
-#define TRACE(...) printf(__VA_ARGS__)
-#else
-#define TRACE(...)
-#endif
-
 /*
  * Environment.
  */
-#define ntoh64(x) rte_be_to_cpu_64(x)
-#define hton64(x) rte_cpu_to_be_64(x)
-
 #ifndef RTE_SWX_PIPELINE_HUGE_PAGES_DISABLE
 
 #include <rte_malloc.h>
@@ -103,1351 +77,6 @@ env_free(void *start, size_t size)
 
 #endif
 
-/*
- * Struct.
- */
-struct field {
-	char name[RTE_SWX_NAME_SIZE];
-	uint32_t n_bits;
-	uint32_t offset;
-	int var_size;
-};
-
-struct struct_type {
-	TAILQ_ENTRY(struct_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct field *fields;
-	uint32_t n_fields;
-	uint32_t n_bits;
-	uint32_t n_bits_min;
-	int var_size;
-};
-
-TAILQ_HEAD(struct_type_tailq, struct_type);
-
-/*
- * Input port.
- */
-struct port_in_type {
-	TAILQ_ENTRY(port_in_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_swx_port_in_ops ops;
-};
-
-TAILQ_HEAD(port_in_type_tailq, port_in_type);
-
-struct port_in {
-	TAILQ_ENTRY(port_in) node;
-	struct port_in_type *type;
-	void *obj;
-	uint32_t id;
-};
-
-TAILQ_HEAD(port_in_tailq, port_in);
-
-struct port_in_runtime {
-	rte_swx_port_in_pkt_rx_t pkt_rx;
-	void *obj;
-};
-
-/*
- * Output port.
- */
-struct port_out_type {
-	TAILQ_ENTRY(port_out_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_swx_port_out_ops ops;
-};
-
-TAILQ_HEAD(port_out_type_tailq, port_out_type);
-
-struct port_out {
-	TAILQ_ENTRY(port_out) node;
-	struct port_out_type *type;
-	void *obj;
-	uint32_t id;
-};
-
-TAILQ_HEAD(port_out_tailq, port_out);
-
-struct port_out_runtime {
-	rte_swx_port_out_pkt_tx_t pkt_tx;
-	rte_swx_port_out_flush_t flush;
-	void *obj;
-};
-
-/*
- * Extern object.
- */
-struct extern_type_member_func {
-	TAILQ_ENTRY(extern_type_member_func) node;
-	char name[RTE_SWX_NAME_SIZE];
-	rte_swx_extern_type_member_func_t func;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
-
-struct extern_type {
-	TAILQ_ENTRY(extern_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *mailbox_struct_type;
-	rte_swx_extern_type_constructor_t constructor;
-	rte_swx_extern_type_destructor_t destructor;
-	struct extern_type_member_func_tailq funcs;
-	uint32_t n_funcs;
-};
-
-TAILQ_HEAD(extern_type_tailq, extern_type);
-
-struct extern_obj {
-	TAILQ_ENTRY(extern_obj) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct extern_type *type;
-	void *obj;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_obj_tailq, extern_obj);
-
-#ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
-#define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
-#endif
-
-struct extern_obj_runtime {
-	void *obj;
-	uint8_t *mailbox;
-	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
-};
-
-/*
- * Extern function.
- */
-struct extern_func {
-	TAILQ_ENTRY(extern_func) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *mailbox_struct_type;
-	rte_swx_extern_func_t func;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_func_tailq, extern_func);
-
-struct extern_func_runtime {
-	uint8_t *mailbox;
-	rte_swx_extern_func_t func;
-};
-
-/*
- * Header.
- */
-struct header {
-	TAILQ_ENTRY(header) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *st;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(header_tailq, header);
-
-struct header_runtime {
-	uint8_t *ptr0;
-	uint32_t n_bytes;
-};
-
-struct header_out_runtime {
-	uint8_t *ptr0;
-	uint8_t *ptr;
-	uint32_t n_bytes;
-};
-
-/*
- * Instruction.
- */
-
-/* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
- * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
- * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
- * when transferred to packet meta-data and in NBO when transferred to packet
- * headers.
- */
-
-/* Notation conventions:
- *    -Header field: H = h.header.field (dst/src)
- *    -Meta-data field: M = m.field (dst/src)
- *    -Extern object mailbox field: E = e.field (dst/src)
- *    -Extern function mailbox field: F = f.field (dst/src)
- *    -Table action data field: T = t.field (src only)
- *    -Immediate value: I = 32-bit unsigned value (src only)
- */
-
-enum instruction_type {
-	/* rx m.port_in */
-	INSTR_RX,
-
-	/* tx port_out
-	 * port_out = MI
-	 */
-	INSTR_TX,   /* port_out = M */
-	INSTR_TX_I, /* port_out = I */
-
-	/* extract h.header */
-	INSTR_HDR_EXTRACT,
-	INSTR_HDR_EXTRACT2,
-	INSTR_HDR_EXTRACT3,
-	INSTR_HDR_EXTRACT4,
-	INSTR_HDR_EXTRACT5,
-	INSTR_HDR_EXTRACT6,
-	INSTR_HDR_EXTRACT7,
-	INSTR_HDR_EXTRACT8,
-
-	/* extract h.header m.last_field_size */
-	INSTR_HDR_EXTRACT_M,
-
-	/* lookahead h.header */
-	INSTR_HDR_LOOKAHEAD,
-
-	/* emit h.header */
-	INSTR_HDR_EMIT,
-	INSTR_HDR_EMIT_TX,
-	INSTR_HDR_EMIT2_TX,
-	INSTR_HDR_EMIT3_TX,
-	INSTR_HDR_EMIT4_TX,
-	INSTR_HDR_EMIT5_TX,
-	INSTR_HDR_EMIT6_TX,
-	INSTR_HDR_EMIT7_TX,
-	INSTR_HDR_EMIT8_TX,
-
-	/* validate h.header */
-	INSTR_HDR_VALIDATE,
-
-	/* invalidate h.header */
-	INSTR_HDR_INVALIDATE,
-
-	/* mov dst src
-	 * dst = src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_MOV,    /* dst = MEF, src = MEFT */
-	INSTR_MOV_MH, /* dst = MEF, src = H */
-	INSTR_MOV_HM, /* dst = H, src = MEFT */
-	INSTR_MOV_HH, /* dst = H, src = H */
-	INSTR_MOV_I,  /* dst = HMEF, src = I */
-
-	/* dma h.header t.field
-	 * memcpy(h.header, t.field, sizeof(h.header))
-	 */
-	INSTR_DMA_HT,
-	INSTR_DMA_HT2,
-	INSTR_DMA_HT3,
-	INSTR_DMA_HT4,
-	INSTR_DMA_HT5,
-	INSTR_DMA_HT6,
-	INSTR_DMA_HT7,
-	INSTR_DMA_HT8,
-
-	/* add dst src
-	 * dst += src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
-	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
-	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
-	INSTR_ALU_ADD_HH, /* dst = H, src = H */
-	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
-	INSTR_ALU_ADD_HI, /* dst = H, src = I */
-
-	/* sub dst src
-	 * dst -= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SUB_HH, /* dst = H, src = H */
-	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SUB_HI, /* dst = H, src = I */
-
-	/* ckadd dst src
-	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
-	 * dst = H, src = {H, h.header}
-	 */
-	INSTR_ALU_CKADD_FIELD,    /* src = H */
-	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
-	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
-
-	/* cksub dst src
-	 * dst = dst '- src
-	 * dst = H, src = H
-	 */
-	INSTR_ALU_CKSUB_FIELD,
-
-	/* and dst src
-	 * dst &= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
-	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_AND_HH, /* dst = H, src = H */
-	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
-
-	/* or dst src
-	 * dst |= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_OR_HH, /* dst = H, src = H */
-	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
-
-	/* xor dst src
-	 * dst ^= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_XOR_HH, /* dst = H, src = H */
-	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
-
-	/* shl dst src
-	 * dst <<= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SHL_HH, /* dst = H, src = H */
-	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SHL_HI, /* dst = H, src = I */
-
-	/* shr dst src
-	 * dst >>= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SHR_HH, /* dst = H, src = H */
-	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SHR_HI, /* dst = H, src = I */
-
-	/* regprefetch REGARRAY index
-	 * prefetch REGARRAY[index]
-	 * index = HMEFTI
-	 */
-	INSTR_REGPREFETCH_RH, /* index = H */
-	INSTR_REGPREFETCH_RM, /* index = MEFT */
-	INSTR_REGPREFETCH_RI, /* index = I */
-
-	/* regrd dst REGARRAY index
-	 * dst = REGARRAY[index]
-	 * dst = HMEF, index = HMEFTI
-	 */
-	INSTR_REGRD_HRH, /* dst = H, index = H */
-	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
-	INSTR_REGRD_HRI, /* dst = H, index = I */
-	INSTR_REGRD_MRH, /* dst = MEF, index = H */
-	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
-	INSTR_REGRD_MRI, /* dst = MEF, index = I */
-
-	/* regwr REGARRAY index src
-	 * REGARRAY[index] = src
-	 * index = HMEFTI, src = HMEFTI
-	 */
-	INSTR_REGWR_RHH, /* index = H, src = H */
-	INSTR_REGWR_RHM, /* index = H, src = MEFT */
-	INSTR_REGWR_RHI, /* index = H, src = I */
-	INSTR_REGWR_RMH, /* index = MEFT, src = H */
-	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
-	INSTR_REGWR_RMI, /* index = MEFT, src = I */
-	INSTR_REGWR_RIH, /* index = I, src = H */
-	INSTR_REGWR_RIM, /* index = I, src = MEFT */
-	INSTR_REGWR_RII, /* index = I, src = I */
-
-	/* regadd REGARRAY index src
-	 * REGARRAY[index] += src
-	 * index = HMEFTI, src = HMEFTI
-	 */
-	INSTR_REGADD_RHH, /* index = H, src = H */
-	INSTR_REGADD_RHM, /* index = H, src = MEFT */
-	INSTR_REGADD_RHI, /* index = H, src = I */
-	INSTR_REGADD_RMH, /* index = MEFT, src = H */
-	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
-	INSTR_REGADD_RMI, /* index = MEFT, src = I */
-	INSTR_REGADD_RIH, /* index = I, src = H */
-	INSTR_REGADD_RIM, /* index = I, src = MEFT */
-	INSTR_REGADD_RII, /* index = I, src = I */
-
-	/* metprefetch METARRAY index
-	 * prefetch METARRAY[index]
-	 * index = HMEFTI
-	 */
-	INSTR_METPREFETCH_H, /* index = H */
-	INSTR_METPREFETCH_M, /* index = MEFT */
-	INSTR_METPREFETCH_I, /* index = I */
-
-	/* meter METARRAY index length color_in color_out
-	 * color_out = meter(METARRAY[index], length, color_in)
-	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
-	 */
-	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
-	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
-	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
-	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
-	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
-	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
-	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
-	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
-	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
-	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
-	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
-	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
-
-	/* table TABLE */
-	INSTR_TABLE,
-	INSTR_SELECTOR,
-	INSTR_LEARNER,
-
-	/* learn LEARNER ACTION_NAME */
-	INSTR_LEARNER_LEARN,
-
-	/* forget */
-	INSTR_LEARNER_FORGET,
-
-	/* extern e.obj.func */
-	INSTR_EXTERN_OBJ,
-
-	/* extern f.func */
-	INSTR_EXTERN_FUNC,
-
-	/* jmp LABEL
-	 * Unconditional jump
-	 */
-	INSTR_JMP,
-
-	/* jmpv LABEL h.header
-	 * Jump if header is valid
-	 */
-	INSTR_JMP_VALID,
-
-	/* jmpnv LABEL h.header
-	 * Jump if header is invalid
-	 */
-	INSTR_JMP_INVALID,
-
-	/* jmph LABEL
-	 * Jump if table lookup hit
-	 */
-	INSTR_JMP_HIT,
-
-	/* jmpnh LABEL
-	 * Jump if table lookup miss
-	 */
-	INSTR_JMP_MISS,
-
-	/* jmpa LABEL ACTION
-	 * Jump if action run
-	 */
-	INSTR_JMP_ACTION_HIT,
-
-	/* jmpna LABEL ACTION
-	 * Jump if action not run
-	 */
-	INSTR_JMP_ACTION_MISS,
-
-	/* jmpeq LABEL a b
-	 * Jump if a is equal to b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
-	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
-	INSTR_JMP_EQ_HH, /* a = H, b = H */
-	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
-
-	/* jmpneq LABEL a b
-	 * Jump if a is not equal to b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
-	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
-	INSTR_JMP_NEQ_HH, /* a = H, b = H */
-	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
-
-	/* jmplt LABEL a b
-	 * Jump if a is less than b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
-	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
-	INSTR_JMP_LT_HH, /* a = H, b = H */
-	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
-	INSTR_JMP_LT_HI, /* a = H, b = I */
-
-	/* jmpgt LABEL a b
-	 * Jump if a is greater than b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
-	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
-	INSTR_JMP_GT_HH, /* a = H, b = H */
-	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
-	INSTR_JMP_GT_HI, /* a = H, b = I */
-
-	/* return
-	 * Return from action
-	 */
-	INSTR_RETURN,
-};
-
-struct instr_operand {
-	uint8_t struct_id;
-	uint8_t n_bits;
-	uint8_t offset;
-	uint8_t pad;
-};
-
-struct instr_io {
-	struct {
-		union {
-			struct {
-				uint8_t offset;
-				uint8_t n_bits;
-				uint8_t pad[2];
-			};
-
-			uint32_t val;
-		};
-	} io;
-
-	struct {
-		uint8_t header_id[8];
-		uint8_t struct_id[8];
-		uint8_t n_bytes[8];
-	} hdr;
-};
-
-struct instr_hdr_validity {
-	uint8_t header_id;
-};
-
-struct instr_table {
-	uint8_t table_id;
-};
-
-struct instr_learn {
-	uint8_t action_id;
-};
-
-struct instr_extern_obj {
-	uint8_t ext_obj_id;
-	uint8_t func_id;
-};
-
-struct instr_extern_func {
-	uint8_t ext_func_id;
-};
-
-struct instr_dst_src {
-	struct instr_operand dst;
-	union {
-		struct instr_operand src;
-		uint64_t src_val;
-	};
-};
-
-struct instr_regarray {
-	uint8_t regarray_id;
-	uint8_t pad[3];
-
-	union {
-		struct instr_operand idx;
-		uint32_t idx_val;
-	};
-
-	union {
-		struct instr_operand dstsrc;
-		uint64_t dstsrc_val;
-	};
-};
-
-struct instr_meter {
-	uint8_t metarray_id;
-	uint8_t pad[3];
-
-	union {
-		struct instr_operand idx;
-		uint32_t idx_val;
-	};
-
-	struct instr_operand length;
-
-	union {
-		struct instr_operand color_in;
-		uint32_t color_in_val;
-	};
-
-	struct instr_operand color_out;
-};
-
-struct instr_dma {
-	struct {
-		uint8_t header_id[8];
-		uint8_t struct_id[8];
-	} dst;
-
-	struct {
-		uint8_t offset[8];
-	} src;
-
-	uint16_t n_bytes[8];
-};
-
-struct instr_jmp {
-	struct instruction *ip;
-
-	union {
-		struct instr_operand a;
-		uint8_t header_id;
-		uint8_t action_id;
-	};
-
-	union {
-		struct instr_operand b;
-		uint64_t b_val;
-	};
-};
-
-struct instruction {
-	enum instruction_type type;
-	union {
-		struct instr_io io;
-		struct instr_hdr_validity valid;
-		struct instr_dst_src mov;
-		struct instr_regarray regarray;
-		struct instr_meter meter;
-		struct instr_dma dma;
-		struct instr_dst_src alu;
-		struct instr_table table;
-		struct instr_learn learn;
-		struct instr_extern_obj ext_obj;
-		struct instr_extern_func ext_func;
-		struct instr_jmp jmp;
-	};
-};
-
-struct instruction_data {
-	char label[RTE_SWX_NAME_SIZE];
-	char jmp_label[RTE_SWX_NAME_SIZE];
-	uint32_t n_users; /* user = jmp instruction to this instruction. */
-	int invalid;
-};
-
-/*
- * Action.
- */
-struct action {
-	TAILQ_ENTRY(action) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *st;
-	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
-	struct instruction *instructions;
-	uint32_t n_instructions;
-	uint32_t id;
-};
-
-TAILQ_HEAD(action_tailq, action);
-
-/*
- * Table.
- */
-struct table_type {
-	TAILQ_ENTRY(table_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	enum rte_swx_table_match_type match_type;
-	struct rte_swx_table_ops ops;
-};
-
-TAILQ_HEAD(table_type_tailq, table_type);
-
-struct match_field {
-	enum rte_swx_table_match_type match_type;
-	struct field *field;
-};
-
-struct table {
-	TAILQ_ENTRY(table) node;
-	char name[RTE_SWX_NAME_SIZE];
-	char args[RTE_SWX_NAME_SIZE];
-	struct table_type *type; /* NULL when n_fields == 0. */
-
-	/* Match. */
-	struct match_field *fields;
-	uint32_t n_fields;
-	struct header *header; /* Only valid when n_fields > 0. */
-
-	/* Action. */
-	struct action **actions;
-	struct action *default_action;
-	uint8_t *default_action_data;
-	uint32_t n_actions;
-	int default_action_is_const;
-	uint32_t action_data_size_max;
-
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(table_tailq, table);
-
-struct table_runtime {
-	rte_swx_table_lookup_t func;
-	void *mailbox;
-	uint8_t **key;
-};
-
-struct table_statistics {
-	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
-	uint64_t *n_pkts_action;
-};
-
-/*
- * Selector.
- */
-struct selector {
-	TAILQ_ENTRY(selector) node;
-	char name[RTE_SWX_NAME_SIZE];
-
-	struct field *group_id_field;
-	struct field **selector_fields;
-	uint32_t n_selector_fields;
-	struct header *selector_header;
-	struct field *member_id_field;
-
-	uint32_t n_groups_max;
-	uint32_t n_members_per_group_max;
-
-	uint32_t id;
-};
-
-TAILQ_HEAD(selector_tailq, selector);
-
-struct selector_runtime {
-	void *mailbox;
-	uint8_t **group_id_buffer;
-	uint8_t **selector_buffer;
-	uint8_t **member_id_buffer;
-};
-
-struct selector_statistics {
-	uint64_t n_pkts;
-};
-
-/*
- * Learner table.
- */
-struct learner {
-	TAILQ_ENTRY(learner) node;
-	char name[RTE_SWX_NAME_SIZE];
-
-	/* Match. */
-	struct field **fields;
-	uint32_t n_fields;
-	struct header *header;
-
-	/* Action. */
-	struct action **actions;
-	struct field **action_arg;
-	struct action *default_action;
-	uint8_t *default_action_data;
-	uint32_t n_actions;
-	int default_action_is_const;
-	uint32_t action_data_size_max;
-
-	uint32_t size;
-	uint32_t timeout;
-	uint32_t id;
-};
-
-TAILQ_HEAD(learner_tailq, learner);
-
-struct learner_runtime {
-	void *mailbox;
-	uint8_t **key;
-	uint8_t **action_data;
-};
-
-struct learner_statistics {
-	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
-	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
-	uint64_t n_pkts_forget;
-	uint64_t *n_pkts_action;
-};
-
-/*
- * Register array.
- */
-struct regarray {
-	TAILQ_ENTRY(regarray) node;
-	char name[RTE_SWX_NAME_SIZE];
-	uint64_t init_val;
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(regarray_tailq, regarray);
-
-struct regarray_runtime {
-	uint64_t *regarray;
-	uint32_t size_mask;
-};
-
-/*
- * Meter array.
- */
-struct meter_profile {
-	TAILQ_ENTRY(meter_profile) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_meter_trtcm_params params;
-	struct rte_meter_trtcm_profile profile;
-	uint32_t n_users;
-};
-
-TAILQ_HEAD(meter_profile_tailq, meter_profile);
-
-struct metarray {
-	TAILQ_ENTRY(metarray) node;
-	char name[RTE_SWX_NAME_SIZE];
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(metarray_tailq, metarray);
-
-struct meter {
-	struct rte_meter_trtcm m;
-	struct meter_profile *profile;
-	enum rte_color color_mask;
-	uint8_t pad[20];
-
-	uint64_t n_pkts[RTE_COLORS];
-	uint64_t n_bytes[RTE_COLORS];
-};
-
-struct metarray_runtime {
-	struct meter *metarray;
-	uint32_t size_mask;
-};
-
-/*
- * Pipeline.
- */
-struct thread {
-	/* Packet. */
-	struct rte_swx_pkt pkt;
-	uint8_t *ptr;
-
-	/* Structures. */
-	uint8_t **structs;
-
-	/* Packet headers. */
-	struct header_runtime *headers; /* Extracted or generated headers. */
-	struct header_out_runtime *headers_out; /* Emitted headers. */
-	uint8_t *header_storage;
-	uint8_t *header_out_storage;
-	uint64_t valid_headers;
-	uint32_t n_headers_out;
-
-	/* Packet meta-data. */
-	uint8_t *metadata;
-
-	/* Tables. */
-	struct table_runtime *tables;
-	struct selector_runtime *selectors;
-	struct learner_runtime *learners;
-	struct rte_swx_table_state *table_state;
-	uint64_t action_id;
-	int hit; /* 0 = Miss, 1 = Hit. */
-	uint32_t learner_id;
-	uint64_t time;
-
-	/* Extern objects and functions. */
-	struct extern_obj_runtime *extern_objs;
-	struct extern_func_runtime *extern_funcs;
-
-	/* Instructions. */
-	struct instruction *ip;
-	struct instruction *ret;
-};
-
-#define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
-#define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
-#define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
-
-#define HEADER_VALID(thread, header_id) \
-	MASK64_BIT_GET((thread)->valid_headers, header_id)
-
-#define ALU(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define ALU_MH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#define ALU_HM(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#define ALU_HM_FAST(thread, ip, operator)  \
-{                                                                                 \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
-	uint64_t dst64 = *dst64_ptr;                                              \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
-	uint64_t dst = dst64 & dst64_mask;                                        \
-										  \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
-	uint64_t src64 = *src64_ptr;                                              \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
-	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
-										  \
-	uint64_t result = dst operator src;                                       \
-										  \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
-}
-
-#define ALU_HH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#define ALU_HH_FAST(thread, ip, operator)  \
-{                                                                                             \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
-	uint64_t dst64 = *dst64_ptr;                                                          \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
-	uint64_t dst = dst64 & dst64_mask;                                                    \
-											      \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
-	uint64_t src64 = *src64_ptr;                                                          \
-	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
-											      \
-	uint64_t result = dst operator src;                                                   \
-											      \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
-}
-
-#else
-
-#define ALU_MH ALU
-#define ALU_HM ALU
-#define ALU_HM_FAST ALU
-#define ALU_HH ALU
-#define ALU_HH_FAST ALU
-
-#endif
-
-#define ALU_I(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint64_t src = (ip)->alu.src_val;                                      \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#define ALU_MI ALU_I
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define ALU_HI(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint64_t src = (ip)->alu.src_val;                                      \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#else
-
-#define ALU_HI ALU_I
-
-#endif
-
-#define MOV(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define MOV_MH(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#define MOV_HM(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
-	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
-}
-
-#define MOV_HH(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-									       \
-	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
-	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
-	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
-}
-
-#else
-
-#define MOV_MH MOV
-#define MOV_HM MOV
-#define MOV_HH MOV
-
-#endif
-
-#define MOV_I(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint64_t src = (ip)->mov.src_val;                                      \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#define JMP_CMP(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
-	uint64_t b = b64 & b64_mask;                                           \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define JMP_CMP_MH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HM(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
-	uint64_t b = b64 & b64_mask;                                           \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HH_FAST(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#else
-
-#define JMP_CMP_MH JMP_CMP
-#define JMP_CMP_HM JMP_CMP
-#define JMP_CMP_HH JMP_CMP
-#define JMP_CMP_HH_FAST JMP_CMP
-
-#endif
-
-#define JMP_CMP_I(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint64_t b = (ip)->jmp.b_val;                                          \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_MI JMP_CMP_I
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define JMP_CMP_HI(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint64_t b = (ip)->jmp.b_val;                                          \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#else
-
-#define JMP_CMP_HI JMP_CMP_I
-
-#endif
-
-#define METADATA_READ(thread, offset, n_bits)                                  \
-({                                                                             \
-	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
-	uint64_t m64 = *m64_ptr;                                               \
-	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
-	(m64 & m64_mask);                                                      \
-})
-
-#define METADATA_WRITE(thread, offset, n_bits, value)                          \
-{                                                                              \
-	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
-	uint64_t m64 = *m64_ptr;                                               \
-	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
-									       \
-	uint64_t m_new = value;                                                \
-									       \
-	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
-}
-
-#ifndef RTE_SWX_PIPELINE_THREADS_MAX
-#define RTE_SWX_PIPELINE_THREADS_MAX 16
-#endif
-
-struct rte_swx_pipeline {
-	struct struct_type_tailq struct_types;
-	struct port_in_type_tailq port_in_types;
-	struct port_in_tailq ports_in;
-	struct port_out_type_tailq port_out_types;
-	struct port_out_tailq ports_out;
-	struct extern_type_tailq extern_types;
-	struct extern_obj_tailq extern_objs;
-	struct extern_func_tailq extern_funcs;
-	struct header_tailq headers;
-	struct struct_type *metadata_st;
-	uint32_t metadata_struct_id;
-	struct action_tailq actions;
-	struct table_type_tailq table_types;
-	struct table_tailq tables;
-	struct selector_tailq selectors;
-	struct learner_tailq learners;
-	struct regarray_tailq regarrays;
-	struct meter_profile_tailq meter_profiles;
-	struct metarray_tailq metarrays;
-
-	struct port_in_runtime *in;
-	struct port_out_runtime *out;
-	struct instruction **action_instructions;
-	struct rte_swx_table_state *table_state;
-	struct table_statistics *table_stats;
-	struct selector_statistics *selector_stats;
-	struct learner_statistics *learner_stats;
-	struct regarray_runtime *regarray_runtime;
-	struct metarray_runtime *metarray_runtime;
-	struct instruction *instructions;
-	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
-
-	uint32_t n_structs;
-	uint32_t n_ports_in;
-	uint32_t n_ports_out;
-	uint32_t n_extern_objs;
-	uint32_t n_extern_funcs;
-	uint32_t n_actions;
-	uint32_t n_tables;
-	uint32_t n_selectors;
-	uint32_t n_learners;
-	uint32_t n_regarrays;
-	uint32_t n_metarrays;
-	uint32_t n_headers;
-	uint32_t thread_id;
-	uint32_t port_id;
-	uint32_t n_instructions;
-	int build_done;
-	int numa_node;
-};
-
 /*
  * Struct.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
new file mode 100644
index 0000000000..5d80dd8451
--- /dev/null
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -0,0 +1,1383 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+#ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
+#define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
+
+#include <inttypes.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_meter.h>
+
+#include <rte_swx_table_selector.h>
+#include <rte_swx_table_learner.h>
+#include <rte_swx_pipeline.h>
+#include <rte_swx_ctl.h>
+
+#ifndef TRACE_LEVEL
+#define TRACE_LEVEL 0
+#endif
+
+#if TRACE_LEVEL
+#define TRACE(...) printf(__VA_ARGS__)
+#else
+#define TRACE(...)
+#endif
+
+/*
+ * Environment.
+ */
+#define ntoh64(x) rte_be_to_cpu_64(x)
+#define hton64(x) rte_cpu_to_be_64(x)
+
+/*
+ * Struct.
+ */
+struct field {
+	char name[RTE_SWX_NAME_SIZE];
+	uint32_t n_bits;
+	uint32_t offset;
+	int var_size;
+};
+
+struct struct_type {
+	TAILQ_ENTRY(struct_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct field *fields;
+	uint32_t n_fields;
+	uint32_t n_bits;
+	uint32_t n_bits_min;
+	int var_size;
+};
+
+TAILQ_HEAD(struct_type_tailq, struct_type);
+
+/*
+ * Input port.
+ */
+struct port_in_type {
+	TAILQ_ENTRY(port_in_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_swx_port_in_ops ops;
+};
+
+TAILQ_HEAD(port_in_type_tailq, port_in_type);
+
+struct port_in {
+	TAILQ_ENTRY(port_in) node;
+	struct port_in_type *type;
+	void *obj;
+	uint32_t id;
+};
+
+TAILQ_HEAD(port_in_tailq, port_in);
+
+struct port_in_runtime {
+	rte_swx_port_in_pkt_rx_t pkt_rx;
+	void *obj;
+};
+
+/*
+ * Output port.
+ */
+struct port_out_type {
+	TAILQ_ENTRY(port_out_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_swx_port_out_ops ops;
+};
+
+TAILQ_HEAD(port_out_type_tailq, port_out_type);
+
+struct port_out {
+	TAILQ_ENTRY(port_out) node;
+	struct port_out_type *type;
+	void *obj;
+	uint32_t id;
+};
+
+TAILQ_HEAD(port_out_tailq, port_out);
+
+struct port_out_runtime {
+	rte_swx_port_out_pkt_tx_t pkt_tx;
+	rte_swx_port_out_flush_t flush;
+	void *obj;
+};
+
+/*
+ * Extern object.
+ */
+struct extern_type_member_func {
+	TAILQ_ENTRY(extern_type_member_func) node;
+	char name[RTE_SWX_NAME_SIZE];
+	rte_swx_extern_type_member_func_t func;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
+
+struct extern_type {
+	TAILQ_ENTRY(extern_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *mailbox_struct_type;
+	rte_swx_extern_type_constructor_t constructor;
+	rte_swx_extern_type_destructor_t destructor;
+	struct extern_type_member_func_tailq funcs;
+	uint32_t n_funcs;
+};
+
+TAILQ_HEAD(extern_type_tailq, extern_type);
+
+struct extern_obj {
+	TAILQ_ENTRY(extern_obj) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct extern_type *type;
+	void *obj;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_obj_tailq, extern_obj);
+
+#ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
+#define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
+#endif
+
+struct extern_obj_runtime {
+	void *obj;
+	uint8_t *mailbox;
+	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
+};
+
+/*
+ * Extern function.
+ */
+struct extern_func {
+	TAILQ_ENTRY(extern_func) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *mailbox_struct_type;
+	rte_swx_extern_func_t func;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_func_tailq, extern_func);
+
+struct extern_func_runtime {
+	uint8_t *mailbox;
+	rte_swx_extern_func_t func;
+};
+
+/*
+ * Header.
+ */
+struct header {
+	TAILQ_ENTRY(header) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *st;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(header_tailq, header);
+
+struct header_runtime {
+	uint8_t *ptr0;
+	uint32_t n_bytes;
+};
+
+struct header_out_runtime {
+	uint8_t *ptr0;
+	uint8_t *ptr;
+	uint32_t n_bytes;
+};
+
+/*
+ * Instruction.
+ */
+
+/* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
+ * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
+ * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
+ * when transferred to packet meta-data and in NBO when transferred to packet
+ * headers.
+ */
+
+/* Notation conventions:
+ *    -Header field: H = h.header.field (dst/src)
+ *    -Meta-data field: M = m.field (dst/src)
+ *    -Extern object mailbox field: E = e.field (dst/src)
+ *    -Extern function mailbox field: F = f.field (dst/src)
+ *    -Table action data field: T = t.field (src only)
+ *    -Immediate value: I = 32-bit unsigned value (src only)
+ */
+
+enum instruction_type {
+	/* rx m.port_in */
+	INSTR_RX,
+
+	/* tx port_out
+	 * port_out = MI
+	 */
+	INSTR_TX,   /* port_out = M */
+	INSTR_TX_I, /* port_out = I */
+
+	/* extract h.header */
+	INSTR_HDR_EXTRACT,
+	INSTR_HDR_EXTRACT2,
+	INSTR_HDR_EXTRACT3,
+	INSTR_HDR_EXTRACT4,
+	INSTR_HDR_EXTRACT5,
+	INSTR_HDR_EXTRACT6,
+	INSTR_HDR_EXTRACT7,
+	INSTR_HDR_EXTRACT8,
+
+	/* extract h.header m.last_field_size */
+	INSTR_HDR_EXTRACT_M,
+
+	/* lookahead h.header */
+	INSTR_HDR_LOOKAHEAD,
+
+	/* emit h.header */
+	INSTR_HDR_EMIT,
+	INSTR_HDR_EMIT_TX,
+	INSTR_HDR_EMIT2_TX,
+	INSTR_HDR_EMIT3_TX,
+	INSTR_HDR_EMIT4_TX,
+	INSTR_HDR_EMIT5_TX,
+	INSTR_HDR_EMIT6_TX,
+	INSTR_HDR_EMIT7_TX,
+	INSTR_HDR_EMIT8_TX,
+
+	/* validate h.header */
+	INSTR_HDR_VALIDATE,
+
+	/* invalidate h.header */
+	INSTR_HDR_INVALIDATE,
+
+	/* mov dst src
+	 * dst = src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_MOV,    /* dst = MEF, src = MEFT */
+	INSTR_MOV_MH, /* dst = MEF, src = H */
+	INSTR_MOV_HM, /* dst = H, src = MEFT */
+	INSTR_MOV_HH, /* dst = H, src = H */
+	INSTR_MOV_I,  /* dst = HMEF, src = I */
+
+	/* dma h.header t.field
+	 * memcpy(h.header, t.field, sizeof(h.header))
+	 */
+	INSTR_DMA_HT,
+	INSTR_DMA_HT2,
+	INSTR_DMA_HT3,
+	INSTR_DMA_HT4,
+	INSTR_DMA_HT5,
+	INSTR_DMA_HT6,
+	INSTR_DMA_HT7,
+	INSTR_DMA_HT8,
+
+	/* add dst src
+	 * dst += src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
+	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
+	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
+	INSTR_ALU_ADD_HH, /* dst = H, src = H */
+	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
+	INSTR_ALU_ADD_HI, /* dst = H, src = I */
+
+	/* sub dst src
+	 * dst -= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SUB_HH, /* dst = H, src = H */
+	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SUB_HI, /* dst = H, src = I */
+
+	/* ckadd dst src
+	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
+	 * dst = H, src = {H, h.header}
+	 */
+	INSTR_ALU_CKADD_FIELD,    /* src = H */
+	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
+	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
+
+	/* cksub dst src
+	 * dst = dst '- src
+	 * dst = H, src = H
+	 */
+	INSTR_ALU_CKSUB_FIELD,
+
+	/* and dst src
+	 * dst &= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
+	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_AND_HH, /* dst = H, src = H */
+	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
+
+	/* or dst src
+	 * dst |= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_OR_HH, /* dst = H, src = H */
+	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
+
+	/* xor dst src
+	 * dst ^= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_XOR_HH, /* dst = H, src = H */
+	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
+
+	/* shl dst src
+	 * dst <<= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SHL_HH, /* dst = H, src = H */
+	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SHL_HI, /* dst = H, src = I */
+
+	/* shr dst src
+	 * dst >>= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SHR_HH, /* dst = H, src = H */
+	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SHR_HI, /* dst = H, src = I */
+
+	/* regprefetch REGARRAY index
+	 * prefetch REGARRAY[index]
+	 * index = HMEFTI
+	 */
+	INSTR_REGPREFETCH_RH, /* index = H */
+	INSTR_REGPREFETCH_RM, /* index = MEFT */
+	INSTR_REGPREFETCH_RI, /* index = I */
+
+	/* regrd dst REGARRAY index
+	 * dst = REGARRAY[index]
+	 * dst = HMEF, index = HMEFTI
+	 */
+	INSTR_REGRD_HRH, /* dst = H, index = H */
+	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
+	INSTR_REGRD_HRI, /* dst = H, index = I */
+	INSTR_REGRD_MRH, /* dst = MEF, index = H */
+	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
+	INSTR_REGRD_MRI, /* dst = MEF, index = I */
+
+	/* regwr REGARRAY index src
+	 * REGARRAY[index] = src
+	 * index = HMEFTI, src = HMEFTI
+	 */
+	INSTR_REGWR_RHH, /* index = H, src = H */
+	INSTR_REGWR_RHM, /* index = H, src = MEFT */
+	INSTR_REGWR_RHI, /* index = H, src = I */
+	INSTR_REGWR_RMH, /* index = MEFT, src = H */
+	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
+	INSTR_REGWR_RMI, /* index = MEFT, src = I */
+	INSTR_REGWR_RIH, /* index = I, src = H */
+	INSTR_REGWR_RIM, /* index = I, src = MEFT */
+	INSTR_REGWR_RII, /* index = I, src = I */
+
+	/* regadd REGARRAY index src
+	 * REGARRAY[index] += src
+	 * index = HMEFTI, src = HMEFTI
+	 */
+	INSTR_REGADD_RHH, /* index = H, src = H */
+	INSTR_REGADD_RHM, /* index = H, src = MEFT */
+	INSTR_REGADD_RHI, /* index = H, src = I */
+	INSTR_REGADD_RMH, /* index = MEFT, src = H */
+	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
+	INSTR_REGADD_RMI, /* index = MEFT, src = I */
+	INSTR_REGADD_RIH, /* index = I, src = H */
+	INSTR_REGADD_RIM, /* index = I, src = MEFT */
+	INSTR_REGADD_RII, /* index = I, src = I */
+
+	/* metprefetch METARRAY index
+	 * prefetch METARRAY[index]
+	 * index = HMEFTI
+	 */
+	INSTR_METPREFETCH_H, /* index = H */
+	INSTR_METPREFETCH_M, /* index = MEFT */
+	INSTR_METPREFETCH_I, /* index = I */
+
+	/* meter METARRAY index length color_in color_out
+	 * color_out = meter(METARRAY[index], length, color_in)
+	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
+	 */
+	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
+	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
+	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
+	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
+	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
+	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
+	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
+	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
+	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
+	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
+	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
+	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
+
+	/* table TABLE */
+	INSTR_TABLE,
+	INSTR_SELECTOR,
+	INSTR_LEARNER,
+
+	/* learn LEARNER ACTION_NAME */
+	INSTR_LEARNER_LEARN,
+
+	/* forget */
+	INSTR_LEARNER_FORGET,
+
+	/* extern e.obj.func */
+	INSTR_EXTERN_OBJ,
+
+	/* extern f.func */
+	INSTR_EXTERN_FUNC,
+
+	/* jmp LABEL
+	 * Unconditional jump
+	 */
+	INSTR_JMP,
+
+	/* jmpv LABEL h.header
+	 * Jump if header is valid
+	 */
+	INSTR_JMP_VALID,
+
+	/* jmpnv LABEL h.header
+	 * Jump if header is invalid
+	 */
+	INSTR_JMP_INVALID,
+
+	/* jmph LABEL
+	 * Jump if table lookup hit
+	 */
+	INSTR_JMP_HIT,
+
+	/* jmpnh LABEL
+	 * Jump if table lookup miss
+	 */
+	INSTR_JMP_MISS,
+
+	/* jmpa LABEL ACTION
+	 * Jump if action run
+	 */
+	INSTR_JMP_ACTION_HIT,
+
+	/* jmpna LABEL ACTION
+	 * Jump if action not run
+	 */
+	INSTR_JMP_ACTION_MISS,
+
+	/* jmpeq LABEL a b
+	 * Jump if a is equal to b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
+	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
+	INSTR_JMP_EQ_HH, /* a = H, b = H */
+	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
+
+	/* jmpneq LABEL a b
+	 * Jump if a is not equal to b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
+	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
+	INSTR_JMP_NEQ_HH, /* a = H, b = H */
+	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
+
+	/* jmplt LABEL a b
+	 * Jump if a is less than b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
+	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
+	INSTR_JMP_LT_HH, /* a = H, b = H */
+	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
+	INSTR_JMP_LT_HI, /* a = H, b = I */
+
+	/* jmpgt LABEL a b
+	 * Jump if a is greater than b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
+	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
+	INSTR_JMP_GT_HH, /* a = H, b = H */
+	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
+	INSTR_JMP_GT_HI, /* a = H, b = I */
+
+	/* return
+	 * Return from action
+	 */
+	INSTR_RETURN,
+};
+
+struct instr_operand {
+	uint8_t struct_id;
+	uint8_t n_bits;
+	uint8_t offset;
+	uint8_t pad;
+};
+
+struct instr_io {
+	struct {
+		union {
+			struct {
+				uint8_t offset;
+				uint8_t n_bits;
+				uint8_t pad[2];
+			};
+
+			uint32_t val;
+		};
+	} io;
+
+	struct {
+		uint8_t header_id[8];
+		uint8_t struct_id[8];
+		uint8_t n_bytes[8];
+	} hdr;
+};
+
+struct instr_hdr_validity {
+	uint8_t header_id;
+};
+
+struct instr_table {
+	uint8_t table_id;
+};
+
+struct instr_learn {
+	uint8_t action_id;
+};
+
+struct instr_extern_obj {
+	uint8_t ext_obj_id;
+	uint8_t func_id;
+};
+
+struct instr_extern_func {
+	uint8_t ext_func_id;
+};
+
+struct instr_dst_src {
+	struct instr_operand dst;
+	union {
+		struct instr_operand src;
+		uint64_t src_val;
+	};
+};
+
+struct instr_regarray {
+	uint8_t regarray_id;
+	uint8_t pad[3];
+
+	union {
+		struct instr_operand idx;
+		uint32_t idx_val;
+	};
+
+	union {
+		struct instr_operand dstsrc;
+		uint64_t dstsrc_val;
+	};
+};
+
+struct instr_meter {
+	uint8_t metarray_id;
+	uint8_t pad[3];
+
+	union {
+		struct instr_operand idx;
+		uint32_t idx_val;
+	};
+
+	struct instr_operand length;
+
+	union {
+		struct instr_operand color_in;
+		uint32_t color_in_val;
+	};
+
+	struct instr_operand color_out;
+};
+
+struct instr_dma {
+	struct {
+		uint8_t header_id[8];
+		uint8_t struct_id[8];
+	} dst;
+
+	struct {
+		uint8_t offset[8];
+	} src;
+
+	uint16_t n_bytes[8];
+};
+
+struct instr_jmp {
+	struct instruction *ip;
+
+	union {
+		struct instr_operand a;
+		uint8_t header_id;
+		uint8_t action_id;
+	};
+
+	union {
+		struct instr_operand b;
+		uint64_t b_val;
+	};
+};
+
+struct instruction {
+	enum instruction_type type;
+	union {
+		struct instr_io io;
+		struct instr_hdr_validity valid;
+		struct instr_dst_src mov;
+		struct instr_regarray regarray;
+		struct instr_meter meter;
+		struct instr_dma dma;
+		struct instr_dst_src alu;
+		struct instr_table table;
+		struct instr_learn learn;
+		struct instr_extern_obj ext_obj;
+		struct instr_extern_func ext_func;
+		struct instr_jmp jmp;
+	};
+};
+
+struct instruction_data {
+	char label[RTE_SWX_NAME_SIZE];
+	char jmp_label[RTE_SWX_NAME_SIZE];
+	uint32_t n_users; /* user = jmp instruction to this instruction. */
+	int invalid;
+};
+
+/*
+ * Action.
+ */
+struct action {
+	TAILQ_ENTRY(action) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *st;
+	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
+	struct instruction *instructions;
+	uint32_t n_instructions;
+	uint32_t id;
+};
+
+TAILQ_HEAD(action_tailq, action);
+
+/*
+ * Table.
+ */
+struct table_type {
+	TAILQ_ENTRY(table_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	enum rte_swx_table_match_type match_type;
+	struct rte_swx_table_ops ops;
+};
+
+TAILQ_HEAD(table_type_tailq, table_type);
+
+struct match_field {
+	enum rte_swx_table_match_type match_type;
+	struct field *field;
+};
+
+struct table {
+	TAILQ_ENTRY(table) node;
+	char name[RTE_SWX_NAME_SIZE];
+	char args[RTE_SWX_NAME_SIZE];
+	struct table_type *type; /* NULL when n_fields == 0. */
+
+	/* Match. */
+	struct match_field *fields;
+	uint32_t n_fields;
+	struct header *header; /* Only valid when n_fields > 0. */
+
+	/* Action. */
+	struct action **actions;
+	struct action *default_action;
+	uint8_t *default_action_data;
+	uint32_t n_actions;
+	int default_action_is_const;
+	uint32_t action_data_size_max;
+
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(table_tailq, table);
+
+struct table_runtime {
+	rte_swx_table_lookup_t func;
+	void *mailbox;
+	uint8_t **key;
+};
+
+struct table_statistics {
+	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
+	uint64_t *n_pkts_action;
+};
+
+/*
+ * Selector.
+ */
+struct selector {
+	TAILQ_ENTRY(selector) node;
+	char name[RTE_SWX_NAME_SIZE];
+
+	struct field *group_id_field;
+	struct field **selector_fields;
+	uint32_t n_selector_fields;
+	struct header *selector_header;
+	struct field *member_id_field;
+
+	uint32_t n_groups_max;
+	uint32_t n_members_per_group_max;
+
+	uint32_t id;
+};
+
+TAILQ_HEAD(selector_tailq, selector);
+
+struct selector_runtime {
+	void *mailbox;
+	uint8_t **group_id_buffer;
+	uint8_t **selector_buffer;
+	uint8_t **member_id_buffer;
+};
+
+struct selector_statistics {
+	uint64_t n_pkts;
+};
+
+/*
+ * Learner table.
+ */
+struct learner {
+	TAILQ_ENTRY(learner) node;
+	char name[RTE_SWX_NAME_SIZE];
+
+	/* Match. */
+	struct field **fields;
+	uint32_t n_fields;
+	struct header *header;
+
+	/* Action. */
+	struct action **actions;
+	struct field **action_arg;
+	struct action *default_action;
+	uint8_t *default_action_data;
+	uint32_t n_actions;
+	int default_action_is_const;
+	uint32_t action_data_size_max;
+
+	uint32_t size;
+	uint32_t timeout;
+	uint32_t id;
+};
+
+TAILQ_HEAD(learner_tailq, learner);
+
+struct learner_runtime {
+	void *mailbox;
+	uint8_t **key;
+	uint8_t **action_data;
+};
+
+struct learner_statistics {
+	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
+	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
+	uint64_t n_pkts_forget;
+	uint64_t *n_pkts_action;
+};
+
+/*
+ * Register array.
+ */
+struct regarray {
+	TAILQ_ENTRY(regarray) node;
+	char name[RTE_SWX_NAME_SIZE];
+	uint64_t init_val;
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(regarray_tailq, regarray);
+
+struct regarray_runtime {
+	uint64_t *regarray;
+	uint32_t size_mask;
+};
+
+/*
+ * Meter array.
+ */
+struct meter_profile {
+	TAILQ_ENTRY(meter_profile) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_meter_trtcm_params params;
+	struct rte_meter_trtcm_profile profile;
+	uint32_t n_users;
+};
+
+TAILQ_HEAD(meter_profile_tailq, meter_profile);
+
+struct metarray {
+	TAILQ_ENTRY(metarray) node;
+	char name[RTE_SWX_NAME_SIZE];
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(metarray_tailq, metarray);
+
+struct meter {
+	struct rte_meter_trtcm m;
+	struct meter_profile *profile;
+	enum rte_color color_mask;
+	uint8_t pad[20];
+
+	uint64_t n_pkts[RTE_COLORS];
+	uint64_t n_bytes[RTE_COLORS];
+};
+
+struct metarray_runtime {
+	struct meter *metarray;
+	uint32_t size_mask;
+};
+
+/*
+ * Pipeline.
+ */
+struct thread {
+	/* Packet. */
+	struct rte_swx_pkt pkt;
+	uint8_t *ptr;
+
+	/* Structures. */
+	uint8_t **structs;
+
+	/* Packet headers. */
+	struct header_runtime *headers; /* Extracted or generated headers. */
+	struct header_out_runtime *headers_out; /* Emitted headers. */
+	uint8_t *header_storage;
+	uint8_t *header_out_storage;
+	uint64_t valid_headers;
+	uint32_t n_headers_out;
+
+	/* Packet meta-data. */
+	uint8_t *metadata;
+
+	/* Tables. */
+	struct table_runtime *tables;
+	struct selector_runtime *selectors;
+	struct learner_runtime *learners;
+	struct rte_swx_table_state *table_state;
+	uint64_t action_id;
+	int hit; /* 0 = Miss, 1 = Hit. */
+	uint32_t learner_id;
+	uint64_t time;
+
+	/* Extern objects and functions. */
+	struct extern_obj_runtime *extern_objs;
+	struct extern_func_runtime *extern_funcs;
+
+	/* Instructions. */
+	struct instruction *ip;
+	struct instruction *ret;
+};
+
+#define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
+#define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
+#define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
+
+#define HEADER_VALID(thread, header_id) \
+	MASK64_BIT_GET((thread)->valid_headers, header_id)
+
+#define ALU(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define ALU_MH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#define ALU_HM(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#define ALU_HM_FAST(thread, ip, operator)  \
+{                                                                                 \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
+	uint64_t dst64 = *dst64_ptr;                                              \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
+	uint64_t dst = dst64 & dst64_mask;                                        \
+										  \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
+	uint64_t src64 = *src64_ptr;                                              \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
+	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
+										  \
+	uint64_t result = dst operator src;                                       \
+										  \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
+}
+
+#define ALU_HH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#define ALU_HH_FAST(thread, ip, operator)  \
+{                                                                                             \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
+	uint64_t dst64 = *dst64_ptr;                                                          \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
+	uint64_t dst = dst64 & dst64_mask;                                                    \
+											      \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
+	uint64_t src64 = *src64_ptr;                                                          \
+	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
+											      \
+	uint64_t result = dst operator src;                                                   \
+											      \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
+}
+
+#else
+
+#define ALU_MH ALU
+#define ALU_HM ALU
+#define ALU_HM_FAST ALU
+#define ALU_HH ALU
+#define ALU_HH_FAST ALU
+
+#endif
+
+#define ALU_I(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint64_t src = (ip)->alu.src_val;                                      \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#define ALU_MI ALU_I
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define ALU_HI(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint64_t src = (ip)->alu.src_val;                                      \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#else
+
+#define ALU_HI ALU_I
+
+#endif
+
+#define MOV(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define MOV_MH(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#define MOV_HM(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
+	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
+}
+
+#define MOV_HH(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+									       \
+	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
+	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
+	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
+}
+
+#else
+
+#define MOV_MH MOV
+#define MOV_HM MOV
+#define MOV_HH MOV
+
+#endif
+
+#define MOV_I(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint64_t src = (ip)->mov.src_val;                                      \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#define JMP_CMP(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
+	uint64_t b = b64 & b64_mask;                                           \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define JMP_CMP_MH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HM(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
+	uint64_t b = b64 & b64_mask;                                           \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HH_FAST(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#else
+
+#define JMP_CMP_MH JMP_CMP
+#define JMP_CMP_HM JMP_CMP
+#define JMP_CMP_HH JMP_CMP
+#define JMP_CMP_HH_FAST JMP_CMP
+
+#endif
+
+#define JMP_CMP_I(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint64_t b = (ip)->jmp.b_val;                                          \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_MI JMP_CMP_I
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define JMP_CMP_HI(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint64_t b = (ip)->jmp.b_val;                                          \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#else
+
+#define JMP_CMP_HI JMP_CMP_I
+
+#endif
+
+#define METADATA_READ(thread, offset, n_bits)                                  \
+({                                                                             \
+	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
+	uint64_t m64 = *m64_ptr;                                               \
+	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
+	(m64 & m64_mask);                                                      \
+})
+
+#define METADATA_WRITE(thread, offset, n_bits, value)                          \
+{                                                                              \
+	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
+	uint64_t m64 = *m64_ptr;                                               \
+	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
+									       \
+	uint64_t m_new = value;                                                \
+									       \
+	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
+}
+
+#ifndef RTE_SWX_PIPELINE_THREADS_MAX
+#define RTE_SWX_PIPELINE_THREADS_MAX 16
+#endif
+
+struct rte_swx_pipeline {
+	struct struct_type_tailq struct_types;
+	struct port_in_type_tailq port_in_types;
+	struct port_in_tailq ports_in;
+	struct port_out_type_tailq port_out_types;
+	struct port_out_tailq ports_out;
+	struct extern_type_tailq extern_types;
+	struct extern_obj_tailq extern_objs;
+	struct extern_func_tailq extern_funcs;
+	struct header_tailq headers;
+	struct struct_type *metadata_st;
+	uint32_t metadata_struct_id;
+	struct action_tailq actions;
+	struct table_type_tailq table_types;
+	struct table_tailq tables;
+	struct selector_tailq selectors;
+	struct learner_tailq learners;
+	struct regarray_tailq regarrays;
+	struct meter_profile_tailq meter_profiles;
+	struct metarray_tailq metarrays;
+
+	struct port_in_runtime *in;
+	struct port_out_runtime *out;
+	struct instruction **action_instructions;
+	struct rte_swx_table_state *table_state;
+	struct table_statistics *table_stats;
+	struct selector_statistics *selector_stats;
+	struct learner_statistics *learner_stats;
+	struct regarray_runtime *regarray_runtime;
+	struct metarray_runtime *metarray_runtime;
+	struct instruction *instructions;
+	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
+
+	uint32_t n_structs;
+	uint32_t n_ports_in;
+	uint32_t n_ports_out;
+	uint32_t n_extern_objs;
+	uint32_t n_extern_funcs;
+	uint32_t n_actions;
+	uint32_t n_tables;
+	uint32_t n_selectors;
+	uint32_t n_learners;
+	uint32_t n_regarrays;
+	uint32_t n_metarrays;
+	uint32_t n_headers;
+	uint32_t thread_id;
+	uint32_t port_id;
+	uint32_t n_instructions;
+	int build_done;
+	int numa_node;
+};
+
+#endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 02/24] pipeline: move thread inline functions to header file
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
                     ` (23 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Move the thread inline functions to the internal header file.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 56 ----------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 59 ++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ae9b2056db..7e01453c27 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1492,62 +1492,6 @@ struct_field_parse(struct rte_swx_pipeline *p,
 	}
 }
 
-static inline void
-pipeline_port_inc(struct rte_swx_pipeline *p)
-{
-	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
-}
-
-static inline void
-thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
-{
-	t->ip = p->instructions;
-}
-
-static inline void
-thread_ip_set(struct thread *t, struct instruction *ip)
-{
-	t->ip = ip;
-}
-
-static inline void
-thread_ip_action_call(struct rte_swx_pipeline *p,
-		      struct thread *t,
-		      uint32_t action_id)
-{
-	t->ret = t->ip + 1;
-	t->ip = p->action_instructions[action_id];
-}
-
-static inline void
-thread_ip_inc(struct rte_swx_pipeline *p);
-
-static inline void
-thread_ip_inc(struct rte_swx_pipeline *p)
-{
-	struct thread *t = &p->threads[p->thread_id];
-
-	t->ip++;
-}
-
-static inline void
-thread_ip_inc_cond(struct thread *t, int cond)
-{
-	t->ip += cond;
-}
-
-static inline void
-thread_yield(struct rte_swx_pipeline *p)
-{
-	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
-}
-
-static inline void
-thread_yield_cond(struct rte_swx_pipeline *p, int cond)
-{
-	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
-}
-
 /*
  * rx.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 5d80dd8451..682f4c86a0 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1380,4 +1380,63 @@ struct rte_swx_pipeline {
 	int numa_node;
 };
 
+/*
+ * Instruction.
+ */
+static inline void
+pipeline_port_inc(struct rte_swx_pipeline *p)
+{
+	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
+}
+
+static inline void
+thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
+{
+	t->ip = p->instructions;
+}
+
+static inline void
+thread_ip_set(struct thread *t, struct instruction *ip)
+{
+	t->ip = ip;
+}
+
+static inline void
+thread_ip_action_call(struct rte_swx_pipeline *p,
+		      struct thread *t,
+		      uint32_t action_id)
+{
+	t->ret = t->ip + 1;
+	t->ip = p->action_instructions[action_id];
+}
+
+static inline void
+thread_ip_inc(struct rte_swx_pipeline *p);
+
+static inline void
+thread_ip_inc(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+
+	t->ip++;
+}
+
+static inline void
+thread_ip_inc_cond(struct thread *t, int cond)
+{
+	t->ip += cond;
+}
+
+static inline void
+thread_yield(struct rte_swx_pipeline *p)
+{
+	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
+}
+
+static inline void
+thread_yield_cond(struct rte_swx_pipeline *p, int cond)
+{
+	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 03/24] pipeline: create inline functions for RX instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
                     ` (22 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the RX instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 38 ------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 51 ++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 7e01453c27..ad1ecfc640 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1517,44 +1517,6 @@ instr_rx_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline void
-instr_rx_exec(struct rte_swx_pipeline *p);
-
-static inline void
-instr_rx_exec(struct rte_swx_pipeline *p)
-{
-	struct thread *t = &p->threads[p->thread_id];
-	struct instruction *ip = t->ip;
-	struct port_in_runtime *port = &p->in[p->port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
-	int pkt_received;
-
-	/* Packet. */
-	pkt_received = port->pkt_rx(port->obj, pkt);
-	t->ptr = &pkt->pkt[pkt->offset];
-	rte_prefetch0(t->ptr);
-
-	TRACE("[Thread %2u] rx %s from port %u\n",
-	      p->thread_id,
-	      pkt_received ? "1 pkt" : "0 pkts",
-	      p->port_id);
-
-	/* Headers. */
-	t->valid_headers = 0;
-	t->n_headers_out = 0;
-
-	/* Meta-data. */
-	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
-
-	/* Tables. */
-	t->table_state = p->table_state;
-
-	/* Thread. */
-	pipeline_port_inc(p);
-	thread_ip_inc_cond(t, pkt_received);
-	thread_yield(p);
-}
-
 /*
  * tx.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 682f4c86a0..9814b5685a 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1439,4 +1439,55 @@ thread_yield_cond(struct rte_swx_pipeline *p, int cond)
 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
 }
 
+/*
+ * rx.
+ */
+static inline int
+__instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct port_in_runtime *port = &p->in[p->port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+	int pkt_received;
+
+	/* Packet. */
+	pkt_received = port->pkt_rx(port->obj, pkt);
+	t->ptr = &pkt->pkt[pkt->offset];
+	rte_prefetch0(t->ptr);
+
+	TRACE("[Thread %2u] rx %s from port %u\n",
+	      p->thread_id,
+	      pkt_received ? "1 pkt" : "0 pkts",
+	      p->port_id);
+
+	/* Headers. */
+	t->valid_headers = 0;
+	t->n_headers_out = 0;
+
+	/* Meta-data. */
+	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
+
+	/* Tables. */
+	t->table_state = p->table_state;
+
+	/* Thread. */
+	pipeline_port_inc(p);
+
+	return pkt_received;
+}
+
+static inline void
+instr_rx_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	int pkt_received;
+
+	/* Packet. */
+	pkt_received = __instr_rx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc_cond(t, pkt_received);
+	thread_yield(p);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 04/24] pipeline: create inline functions for TX instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
                     ` (21 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the TX instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 86 +---------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 90 ++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 84 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ad1ecfc640..bcf796f8c3 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1567,84 +1567,13 @@ instr_drop_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline void
-emit_handler(struct thread *t)
-{
-	struct header_out_runtime *h0 = &t->headers_out[0];
-	struct header_out_runtime *h1 = &t->headers_out[1];
-	uint32_t offset = 0, i;
-
-	/* No header change or header decapsulation. */
-	if ((t->n_headers_out == 1) &&
-	    (h0->ptr + h0->n_bytes == t->ptr)) {
-		TRACE("Emit handler: no header change or header decap.\n");
-
-		t->pkt.offset -= h0->n_bytes;
-		t->pkt.length += h0->n_bytes;
-
-		return;
-	}
-
-	/* Header encapsulation (optionally, with prior header decasulation). */
-	if ((t->n_headers_out == 2) &&
-	    (h1->ptr + h1->n_bytes == t->ptr) &&
-	    (h0->ptr == h0->ptr0)) {
-		uint32_t offset;
-
-		TRACE("Emit handler: header encapsulation.\n");
-
-		offset = h0->n_bytes + h1->n_bytes;
-		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
-		t->pkt.offset -= offset;
-		t->pkt.length += offset;
-
-		return;
-	}
-
-	/* Header insertion. */
-	/* TBD */
-
-	/* Header extraction. */
-	/* TBD */
-
-	/* For any other case. */
-	TRACE("Emit handler: complex case.\n");
-
-	for (i = 0; i < t->n_headers_out; i++) {
-		struct header_out_runtime *h = &t->headers_out[i];
-
-		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
-		offset += h->n_bytes;
-	}
-
-	if (offset) {
-		memcpy(t->ptr - offset, t->header_out_storage, offset);
-		t->pkt.offset -= offset;
-		t->pkt.length += offset;
-	}
-}
-
-static inline void
-instr_tx_exec(struct rte_swx_pipeline *p);
-
 static inline void
 instr_tx_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
-	struct port_out_runtime *port = &p->out[port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
 
-	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
-	      p->thread_id,
-	      (uint32_t)port_id);
-
-	/* Headers. */
-	emit_handler(t);
-
-	/* Packet. */
-	port->pkt_tx(port->obj, pkt);
+	__instr_tx_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_reset(p, t);
@@ -1656,19 +1585,8 @@ instr_tx_i_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t port_id = ip->io.io.val;
-	struct port_out_runtime *port = &p->out[port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
-
-	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
-	      p->thread_id,
-	      (uint32_t)port_id);
-
-	/* Headers. */
-	emit_handler(t);
 
-	/* Packet. */
-	port->pkt_tx(port->obj, pkt);
+	__instr_tx_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_reset(p, t);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 9814b5685a..e9fe6632b6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1490,4 +1490,94 @@ instr_rx_exec(struct rte_swx_pipeline *p)
 	thread_yield(p);
 }
 
+/*
+ * tx.
+ */
+static inline void
+emit_handler(struct thread *t)
+{
+	struct header_out_runtime *h0 = &t->headers_out[0];
+	struct header_out_runtime *h1 = &t->headers_out[1];
+	uint32_t offset = 0, i;
+
+	/* No header change or header decapsulation. */
+	if ((t->n_headers_out == 1) &&
+	    (h0->ptr + h0->n_bytes == t->ptr)) {
+		TRACE("Emit handler: no header change or header decap.\n");
+
+		t->pkt.offset -= h0->n_bytes;
+		t->pkt.length += h0->n_bytes;
+
+		return;
+	}
+
+	/* Header encapsulation (optionally, with prior header decasulation). */
+	if ((t->n_headers_out == 2) &&
+	    (h1->ptr + h1->n_bytes == t->ptr) &&
+	    (h0->ptr == h0->ptr0)) {
+		uint32_t offset;
+
+		TRACE("Emit handler: header encapsulation.\n");
+
+		offset = h0->n_bytes + h1->n_bytes;
+		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
+		t->pkt.offset -= offset;
+		t->pkt.length += offset;
+
+		return;
+	}
+
+	/* For any other case. */
+	TRACE("Emit handler: complex case.\n");
+
+	for (i = 0; i < t->n_headers_out; i++) {
+		struct header_out_runtime *h = &t->headers_out[i];
+
+		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
+		offset += h->n_bytes;
+	}
+
+	if (offset) {
+		memcpy(t->ptr - offset, t->header_out_storage, offset);
+		t->pkt.offset -= offset;
+		t->pkt.length += offset;
+	}
+}
+
+static inline void
+__instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
+	struct port_out_runtime *port = &p->out[port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+
+	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
+	      p->thread_id,
+	      (uint32_t)port_id);
+
+	/* Headers. */
+	emit_handler(t);
+
+	/* Packet. */
+	port->pkt_tx(port->obj, pkt);
+}
+
+static inline void
+__instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t port_id = ip->io.io.val;
+	struct port_out_runtime *port = &p->out[port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+
+	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
+	      p->thread_id,
+	      (uint32_t)port_id);
+
+	/* Headers. */
+	emit_handler(t);
+
+	/* Packet. */
+	port->pkt_tx(port->obj, pkt);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 05/24] pipeline: create inline functions for extract instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (2 preceding siblings ...)
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
                     ` (20 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the extract instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 130 ++++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 178 +++++++++++++++++++++++
 2 files changed, 203 insertions(+), 105 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index bcf796f8c3..fd7e31b709 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1665,52 +1665,12 @@ instr_hdr_lookahead_translate(struct rte_swx_pipeline *p,
 }
 
 static inline void
-__instr_hdr_extract_exec(struct rte_swx_pipeline *p, uint32_t n_extract);
-
-static inline void
-__instr_hdr_extract_exec(struct rte_swx_pipeline *p, uint32_t n_extract)
+instr_hdr_extract_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-	uint32_t offset = t->pkt.offset;
-	uint32_t length = t->pkt.length;
-	uint32_t i;
 
-	for (i = 0; i < n_extract; i++) {
-		uint32_t header_id = ip->io.hdr.header_id[i];
-		uint32_t struct_id = ip->io.hdr.struct_id[i];
-		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
-
-		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
-		      p->thread_id,
-		      header_id,
-		      n_bytes);
-
-		/* Headers. */
-		t->structs[struct_id] = ptr;
-		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-
-		/* Packet. */
-		offset += n_bytes;
-		length -= n_bytes;
-		ptr += n_bytes;
-	}
-
-	/* Headers. */
-	t->valid_headers = valid_headers;
-
-	/* Packet. */
-	t->pkt.offset = offset;
-	t->pkt.length = length;
-	t->ptr = ptr;
-}
-
-static inline void
-instr_hdr_extract_exec(struct rte_swx_pipeline *p)
-{
-	__instr_hdr_extract_exec(p, 1);
+	__instr_hdr_extract_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1719,10 +1679,10 @@ instr_hdr_extract_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract2_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 2);
+	__instr_hdr_extract2_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1731,10 +1691,10 @@ instr_hdr_extract2_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract3_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 3);
+	__instr_hdr_extract3_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1743,10 +1703,10 @@ instr_hdr_extract3_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract4_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 4);
+	__instr_hdr_extract4_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1755,10 +1715,10 @@ instr_hdr_extract4_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract5_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 5);
+	__instr_hdr_extract5_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1767,10 +1727,10 @@ instr_hdr_extract5_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract6_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 6);
+	__instr_hdr_extract6_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1779,10 +1739,10 @@ instr_hdr_extract6_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract7_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 7);
+	__instr_hdr_extract7_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1791,10 +1751,10 @@ instr_hdr_extract7_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract8_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 8);
+	__instr_hdr_extract8_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1806,35 +1766,7 @@ instr_hdr_extract_m_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-	uint32_t offset = t->pkt.offset;
-	uint32_t length = t->pkt.length;
-
-	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
-	uint32_t header_id = ip->io.hdr.header_id[0];
-	uint32_t struct_id = ip->io.hdr.struct_id[0];
-	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
-
-	struct header_runtime *h = &t->headers[header_id];
-
-	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
-	      p->thread_id,
-	      header_id,
-	      n_bytes,
-	      n_bytes_last);
-
-	n_bytes += n_bytes_last;
-
-	/* Headers. */
-	t->structs[struct_id] = ptr;
-	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-	h->n_bytes = n_bytes;
-
-	/* Packet. */
-	t->pkt.offset = offset + n_bytes;
-	t->pkt.length = length - n_bytes;
-	t->ptr = ptr + n_bytes;
+	__instr_hdr_extract_m_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1846,19 +1778,7 @@ instr_hdr_lookahead_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-
-	uint32_t header_id = ip->io.hdr.header_id[0];
-	uint32_t struct_id = ip->io.hdr.struct_id[0];
-
-	TRACE("[Thread %2u]: lookahead header %u\n",
-	      p->thread_id,
-	      header_id);
-
-	/* Headers. */
-	t->structs[struct_id] = ptr;
-	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	__instr_hdr_lookahead_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index e9fe6632b6..1519bcc305 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1580,4 +1580,182 @@ __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct ins
 	port->pkt_tx(port->obj, pkt);
 }
 
+/*
+ * extract.
+ */
+static inline void
+__instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			      struct thread *t,
+			      const struct instruction *ip,
+			      uint32_t n_extract)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+	uint32_t offset = t->pkt.offset;
+	uint32_t length = t->pkt.length;
+	uint32_t i;
+
+	for (i = 0; i < n_extract; i++) {
+		uint32_t header_id = ip->io.hdr.header_id[i];
+		uint32_t struct_id = ip->io.hdr.struct_id[i];
+		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
+
+		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
+		      p->thread_id,
+		      header_id,
+		      n_bytes);
+
+		/* Headers. */
+		t->structs[struct_id] = ptr;
+		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+
+		/* Packet. */
+		offset += n_bytes;
+		length -= n_bytes;
+		ptr += n_bytes;
+	}
+
+	/* Headers. */
+	t->valid_headers = valid_headers;
+
+	/* Packet. */
+	t->pkt.offset = offset;
+	t->pkt.length = length;
+	t->ptr = ptr;
+}
+
+static inline void
+__instr_hdr_extract_exec(struct rte_swx_pipeline *p,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	__instr_hdr_extract_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 2);
+}
+
+static inline void
+__instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 3);
+}
+
+static inline void
+__instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 4);
+}
+
+static inline void
+__instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 5);
+}
+
+static inline void
+__instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 6);
+}
+
+static inline void
+__instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 7);
+}
+
+static inline void
+__instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 8);
+}
+
+static inline void
+__instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+	uint32_t offset = t->pkt.offset;
+	uint32_t length = t->pkt.length;
+
+	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
+	uint32_t header_id = ip->io.hdr.header_id[0];
+	uint32_t struct_id = ip->io.hdr.struct_id[0];
+	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
+
+	struct header_runtime *h = &t->headers[header_id];
+
+	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
+	      p->thread_id,
+	      header_id,
+	      n_bytes,
+	      n_bytes_last);
+
+	n_bytes += n_bytes_last;
+
+	/* Headers. */
+	t->structs[struct_id] = ptr;
+	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	h->n_bytes = n_bytes;
+
+	/* Packet. */
+	t->pkt.offset = offset + n_bytes;
+	t->pkt.length = length - n_bytes;
+	t->ptr = ptr + n_bytes;
+}
+
+static inline void
+__instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+
+	uint32_t header_id = ip->io.hdr.header_id[0];
+	uint32_t struct_id = ip->io.hdr.struct_id[0];
+
+	TRACE("[Thread %2u]: lookahead header %u\n",
+	      p->thread_id,
+	      header_id);
+
+	/* Headers. */
+	t->structs[struct_id] = ptr;
+	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 06/24] pipeline: create inline functions for emit instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (3 preceding siblings ...)
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
                     ` (19 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the emit instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 162 ++++++++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 170 +++++++++++++++++++++++
 2 files changed, 228 insertions(+), 104 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index fd7e31b709..80c5fb94bb 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1810,82 +1810,12 @@ instr_hdr_emit_translate(struct rte_swx_pipeline *p,
 }
 
 static inline void
-__instr_hdr_emit_exec(struct rte_swx_pipeline *p, uint32_t n_emit);
-
-static inline void
-__instr_hdr_emit_exec(struct rte_swx_pipeline *p, uint32_t n_emit)
+instr_hdr_emit_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t valid_headers = t->valid_headers;
-	uint32_t n_headers_out = t->n_headers_out;
-	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
-	uint8_t *ho_ptr = NULL;
-	uint32_t ho_nbytes = 0, first = 1, i;
-
-	for (i = 0; i < n_emit; i++) {
-		uint32_t header_id = ip->io.hdr.header_id[i];
-		uint32_t struct_id = ip->io.hdr.struct_id[i];
-
-		struct header_runtime *hi = &t->headers[header_id];
-		uint8_t *hi_ptr0 = hi->ptr0;
-		uint32_t n_bytes = hi->n_bytes;
-
-		uint8_t *hi_ptr = t->structs[struct_id];
-
-		if (!MASK64_BIT_GET(valid_headers, header_id))
-			continue;
-
-		TRACE("[Thread %2u]: emit header %u\n",
-		      p->thread_id,
-		      header_id);
-
-		/* Headers. */
-		if (first) {
-			first = 0;
-
-			if (!t->n_headers_out) {
-				ho = &t->headers_out[0];
-
-				ho->ptr0 = hi_ptr0;
-				ho->ptr = hi_ptr;
-
-				ho_ptr = hi_ptr;
-				ho_nbytes = n_bytes;
-
-				n_headers_out = 1;
-
-				continue;
-			} else {
-				ho_ptr = ho->ptr;
-				ho_nbytes = ho->n_bytes;
-			}
-		}
-
-		if (ho_ptr + ho_nbytes == hi_ptr) {
-			ho_nbytes += n_bytes;
-		} else {
-			ho->n_bytes = ho_nbytes;
-
-			ho++;
-			ho->ptr0 = hi_ptr0;
-			ho->ptr = hi_ptr;
 
-			ho_ptr = hi_ptr;
-			ho_nbytes = n_bytes;
-
-			n_headers_out++;
-		}
-	}
-
-	ho->n_bytes = ho_nbytes;
-	t->n_headers_out = n_headers_out;
-}
-
-static inline void
-instr_hdr_emit_exec(struct rte_swx_pipeline *p)
-{
-	__instr_hdr_emit_exec(p, 1);
+	__instr_hdr_emit_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1894,81 +1824,105 @@ instr_hdr_emit_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 1);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_emit_exec(p, 2);
-	instr_tx_exec(p);
+	__instr_hdr_emit2_tx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit3_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 3);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit4_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 4);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_emit_exec(p, 5);
-	instr_tx_exec(p);
+	__instr_hdr_emit5_tx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit6_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 6);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit7_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 7);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit8_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 8);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 /*
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 1519bcc305..8b37a9812e 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1758,4 +1758,174 @@ __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
 }
 
+/*
+ * emit.
+ */
+static inline void
+__instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip,
+			   uint32_t n_emit)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint32_t n_headers_out = t->n_headers_out;
+	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
+	uint8_t *ho_ptr = NULL;
+	uint32_t ho_nbytes = 0, first = 1, i;
+
+	for (i = 0; i < n_emit; i++) {
+		uint32_t header_id = ip->io.hdr.header_id[i];
+		uint32_t struct_id = ip->io.hdr.struct_id[i];
+
+		struct header_runtime *hi = &t->headers[header_id];
+		uint8_t *hi_ptr0 = hi->ptr0;
+		uint32_t n_bytes = hi->n_bytes;
+
+		uint8_t *hi_ptr = t->structs[struct_id];
+
+		if (!MASK64_BIT_GET(valid_headers, header_id))
+			continue;
+
+		TRACE("[Thread %2u]: emit header %u\n",
+		      p->thread_id,
+		      header_id);
+
+		/* Headers. */
+		if (first) {
+			first = 0;
+
+			if (!t->n_headers_out) {
+				ho = &t->headers_out[0];
+
+				ho->ptr0 = hi_ptr0;
+				ho->ptr = hi_ptr;
+
+				ho_ptr = hi_ptr;
+				ho_nbytes = n_bytes;
+
+				n_headers_out = 1;
+
+				continue;
+			} else {
+				ho_ptr = ho->ptr;
+				ho_nbytes = ho->n_bytes;
+			}
+		}
+
+		if (ho_ptr + ho_nbytes == hi_ptr) {
+			ho_nbytes += n_bytes;
+		} else {
+			ho->n_bytes = ho_nbytes;
+
+			ho++;
+			ho->ptr0 = hi_ptr0;
+			ho->ptr = hi_ptr;
+
+			ho_ptr = hi_ptr;
+			ho_nbytes = n_bytes;
+
+			n_headers_out++;
+		}
+	}
+
+	ho->n_bytes = ho_nbytes;
+	t->n_headers_out = n_headers_out;
+}
+
+static inline void
+__instr_hdr_emit_exec(struct rte_swx_pipeline *p,
+		      struct thread *t,
+		      const struct instruction *ip)
+{
+	__instr_hdr_emit_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 1);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 2);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 3);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 4);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 5);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 6);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 7);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 8);
+	__instr_tx_exec(p, t, ip);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 07/24] pipeline: create inline functions for validate instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (4 preceding siblings ...)
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
                     ` (18 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the validate and invalidate instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 12 ++-------
 lib/pipeline/rte_swx_pipeline_internal.h | 32 ++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 80c5fb94bb..6c6d8e52a5 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1953,12 +1953,8 @@ instr_hdr_validate_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t header_id = ip->valid.header_id;
 
-	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
-
-	/* Headers. */
-	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+	__instr_hdr_validate_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1992,12 +1988,8 @@ instr_hdr_invalidate_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t header_id = ip->valid.header_id;
-
-	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
 
-	/* Headers. */
-	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
+	__instr_hdr_invalidate_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 8b37a9812e..312490f11a 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1928,4 +1928,36 @@ __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
 	__instr_tx_exec(p, t, ip);
 }
 
+/*
+ * validate.
+ */
+static inline void
+__instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	uint32_t header_id = ip->valid.header_id;
+
+	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
+
+	/* Headers. */
+	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+}
+
+/*
+ * invalidate.
+ */
+static inline void
+__instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint32_t header_id = ip->valid.header_id;
+
+	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
+
+	/* Headers. */
+	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 08/24] pipeline: create inline functions for learn instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (5 preceding siblings ...)
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
                     ` (17 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the learn and forget instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 36 ++--------------
 lib/pipeline/rte_swx_pipeline_internal.h | 55 ++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 6c6d8e52a5..ca12f34b01 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2230,27 +2230,8 @@ instr_learn_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t action_id = ip->learn.action_id;
-	uint32_t learner_id = t->learner_id;
-	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
-		p->n_selectors + learner_id];
-	struct learner_runtime *l = &t->learners[learner_id];
-	struct learner_statistics *stats = &p->learner_stats[learner_id];
-	uint32_t status;
-
-	/* Table. */
-	status = rte_swx_table_learner_add(ts->obj,
-					   l->mailbox,
-					   t->time,
-					   action_id,
-					   l->action_data[action_id]);
-
-	TRACE("[Thread %2u] learner %u learn %s\n",
-	      p->thread_id,
-	      learner_id,
-	      status ? "ok" : "error");
 
-	stats->n_pkts_learn[status] += 1;
+	__instr_learn_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2279,20 +2260,9 @@ static inline void
 instr_forget_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
-	uint32_t learner_id = t->learner_id;
-	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
-		p->n_selectors + learner_id];
-	struct learner_runtime *l = &t->learners[learner_id];
-	struct learner_statistics *stats = &p->learner_stats[learner_id];
-
-	/* Table. */
-	rte_swx_table_learner_delete(ts->obj, l->mailbox);
-
-	TRACE("[Thread %2u] learner %u forget\n",
-	      p->thread_id,
-	      learner_id);
+	struct instruction *ip = t->ip;
 
-	stats->n_pkts_forget += 1;
+	__instr_forget_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 312490f11a..24096a23b6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1960,4 +1960,59 @@ __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
 }
 
+/*
+ * learn.
+ */
+static inline void
+__instr_learn_exec(struct rte_swx_pipeline *p,
+		   struct thread *t,
+		   const struct instruction *ip)
+{
+	uint64_t action_id = ip->learn.action_id;
+	uint32_t learner_id = t->learner_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+	uint32_t status;
+
+	/* Table. */
+	status = rte_swx_table_learner_add(ts->obj,
+					   l->mailbox,
+					   t->time,
+					   action_id,
+					   l->action_data[action_id]);
+
+	TRACE("[Thread %2u] learner %u learn %s\n",
+	      p->thread_id,
+	      learner_id,
+	      status ? "ok" : "error");
+
+	stats->n_pkts_learn[status] += 1;
+}
+
+/*
+ * forget.
+ */
+static inline void
+__instr_forget_exec(struct rte_swx_pipeline *p,
+		    struct thread *t,
+		    const struct instruction *ip __rte_unused)
+{
+	uint32_t learner_id = t->learner_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+
+	/* Table. */
+	rte_swx_table_learner_delete(ts->obj, l->mailbox);
+
+	TRACE("[Thread %2u] learner %u forget\n",
+	      p->thread_id,
+	      learner_id);
+
+	stats->n_pkts_forget += 1;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 09/24] pipeline: create inline functions for extern instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (6 preceding siblings ...)
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
                     ` (16 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the extern instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 22 +++---------
 lib/pipeline/rte_swx_pipeline_internal.h | 43 ++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ca12f34b01..c9e29230c2 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2317,18 +2317,10 @@ instr_extern_obj_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t obj_id = ip->ext_obj.ext_obj_id;
-	uint32_t func_id = ip->ext_obj.func_id;
-	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
-	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
-
-	TRACE("[Thread %2u] extern obj %u member func %u\n",
-	      p->thread_id,
-	      obj_id,
-	      func_id);
+	uint32_t done;
 
 	/* Extern object member function execute. */
-	uint32_t done = func(obj->obj, obj->mailbox);
+	done = __instr_extern_obj_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc_cond(t, done);
@@ -2340,16 +2332,10 @@ instr_extern_func_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t ext_func_id = ip->ext_func.ext_func_id;
-	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
-	rte_swx_extern_func_t func = ext_func->func;
-
-	TRACE("[Thread %2u] extern func %u\n",
-	      p->thread_id,
-	      ext_func_id);
+	uint32_t done;
 
 	/* Extern function execute. */
-	uint32_t done = func(ext_func->mailbox);
+	done = __instr_extern_func_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc_cond(t, done);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 24096a23b6..14d6d88344 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2015,4 +2015,47 @@ __instr_forget_exec(struct rte_swx_pipeline *p,
 	stats->n_pkts_forget += 1;
 }
 
+/*
+ * extern.
+ */
+static inline uint32_t
+__instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint32_t obj_id = ip->ext_obj.ext_obj_id;
+	uint32_t func_id = ip->ext_obj.func_id;
+	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
+	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
+	uint32_t done;
+
+	TRACE("[Thread %2u] extern obj %u member func %u\n",
+	      p->thread_id,
+	      obj_id,
+	      func_id);
+
+	done = func(obj->obj, obj->mailbox);
+
+	return done;
+}
+
+static inline uint32_t
+__instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	uint32_t ext_func_id = ip->ext_func.ext_func_id;
+	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
+	rte_swx_extern_func_t func = ext_func->func;
+	uint32_t done;
+
+	TRACE("[Thread %2u] extern func %u\n",
+	      p->thread_id,
+	      ext_func_id);
+
+	done = func(ext_func->mailbox);
+
+	return done;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 10/24] pipeline: create inline functions for move instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (7 preceding siblings ...)
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
@ 2021-09-10 13:36   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
                     ` (15 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:36 UTC (permalink / raw)
  To: dev

Create inline functions for the move instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 26 +++---------
 lib/pipeline/rte_swx_pipeline_internal.h | 53 ++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index c9e29230c2..72606f1a06 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2407,10 +2407,7 @@ instr_mov_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov\n",
-	      p->thread_id);
-
-	MOV(t, ip);
+	__instr_mov_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2422,10 +2419,7 @@ instr_mov_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (mh)\n",
-	      p->thread_id);
-
-	MOV_MH(t, ip);
+	__instr_mov_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2437,10 +2431,7 @@ instr_mov_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (hm)\n",
-	      p->thread_id);
-
-	MOV_HM(t, ip);
+	__instr_mov_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2452,10 +2443,7 @@ instr_mov_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (hh)\n",
-	      p->thread_id);
-
-	MOV_HH(t, ip);
+	__instr_mov_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2467,11 +2455,7 @@ instr_mov_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n",
-	      p->thread_id,
-	      ip->mov.src_val);
-
-	MOV_I(t, ip);
+	__instr_mov_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 14d6d88344..1bf94159a9 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2058,4 +2058,57 @@ __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
 	return done;
 }
 
+/*
+ * mov.
+ */
+static inline void
+__instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
+		 struct thread *t,
+		 const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov\n", p->thread_id);
+
+	MOV(t, ip);
+}
+
+static inline void
+__instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
+
+	MOV_MH(t, ip);
+}
+
+static inline void
+__instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
+
+	MOV_HM(t, ip);
+}
+
+static inline void
+__instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
+
+	MOV_HH(t, ip);
+}
+
+static inline void
+__instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		   struct thread *t,
+		   const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
+
+	MOV_I(t, ip);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 11/24] pipeline: create inline functions for DMA instruction
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (8 preceding siblings ...)
  2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
                     ` (14 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Create inline functions for the DMA instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          |  80 ++++++------------
 lib/pipeline/rte_swx_pipeline_internal.h | 100 +++++++++++++++++++++++
 2 files changed, 123 insertions(+), 57 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 72606f1a06..a06dc8d348 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2465,46 +2465,12 @@ instr_mov_i_exec(struct rte_swx_pipeline *p)
  * dma.
  */
 static inline void
-__instr_dma_ht_exec(struct rte_swx_pipeline *p, uint32_t n_dma);
-
-static inline void
-__instr_dma_ht_exec(struct rte_swx_pipeline *p, uint32_t n_dma)
+instr_dma_ht_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *action_data = t->structs[0];
-	uint64_t valid_headers = t->valid_headers;
-	uint32_t i;
-
-	for (i = 0; i < n_dma; i++) {
-		uint32_t header_id = ip->dma.dst.header_id[i];
-		uint32_t struct_id = ip->dma.dst.struct_id[i];
-		uint32_t offset = ip->dma.src.offset[i];
-		uint32_t n_bytes = ip->dma.n_bytes[i];
-
-		struct header_runtime *h = &t->headers[header_id];
-		uint8_t *h_ptr0 = h->ptr0;
-		uint8_t *h_ptr = t->structs[struct_id];
-
-		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
-			h_ptr : h_ptr0;
-		void *src = &action_data[offset];
-
-		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
 
-		/* Headers. */
-		memcpy(dst, src, n_bytes);
-		t->structs[struct_id] = dst;
-		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-	}
-
-	t->valid_headers = valid_headers;
-}
-
-static inline void
-instr_dma_ht_exec(struct rte_swx_pipeline *p)
-{
-	__instr_dma_ht_exec(p, 1);
+	__instr_dma_ht_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2513,10 +2479,10 @@ instr_dma_ht_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht2_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 2);
+	__instr_dma_ht2_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2525,10 +2491,10 @@ instr_dma_ht2_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht3_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 3);
+	__instr_dma_ht3_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2537,10 +2503,10 @@ instr_dma_ht3_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht4_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 4);
+	__instr_dma_ht4_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2549,10 +2515,10 @@ instr_dma_ht4_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht5_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 5);
+	__instr_dma_ht5_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2561,10 +2527,10 @@ instr_dma_ht5_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht6_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 6);
+	__instr_dma_ht6_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2573,10 +2539,10 @@ instr_dma_ht6_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht7_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 7);
+	__instr_dma_ht7_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2585,10 +2551,10 @@ instr_dma_ht7_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht8_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 8);
+	__instr_dma_ht8_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 1bf94159a9..ec8e342a5d 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2111,4 +2111,104 @@ __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
 	MOV_I(t, ip);
 }
 
+/*
+ * dma.
+ */
+static inline void
+__instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			 struct thread *t,
+			 const struct instruction *ip,
+			 uint32_t n_dma)
+{
+	uint8_t *action_data = t->structs[0];
+	uint64_t valid_headers = t->valid_headers;
+	uint32_t i;
+
+	for (i = 0; i < n_dma; i++) {
+		uint32_t header_id = ip->dma.dst.header_id[i];
+		uint32_t struct_id = ip->dma.dst.struct_id[i];
+		uint32_t offset = ip->dma.src.offset[i];
+		uint32_t n_bytes = ip->dma.n_bytes[i];
+
+		struct header_runtime *h = &t->headers[header_id];
+		uint8_t *h_ptr0 = h->ptr0;
+		uint8_t *h_ptr = t->structs[struct_id];
+
+		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
+			h_ptr : h_ptr0;
+		void *src = &action_data[offset];
+
+		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
+
+		/* Headers. */
+		memcpy(dst, src, n_bytes);
+		t->structs[struct_id] = dst;
+		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	}
+
+	t->valid_headers = valid_headers;
+}
+
+static inline void
+__instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	__instr_dma_ht_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 2);
+}
+
+static inline void
+__instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 3);
+}
+
+static inline void
+__instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 4);
+}
+
+static inline void
+__instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 5);
+}
+
+static inline void
+__instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 6);
+}
+
+static inline void
+__instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 7);
+}
+
+static inline void
+__instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 8);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 12/24] pipeline: create inline functions for ALU instructions
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (9 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
                     ` (13 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Create inline functions for the ALU instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 348 ++-----------
 lib/pipeline/rte_swx_pipeline_internal.h | 616 +++++++++++++++++++++++
 2 files changed, 660 insertions(+), 304 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index a06dc8d348..8956b6de27 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3044,10 +3044,8 @@ instr_alu_add_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add\n", p->thread_id);
-
-	/* Structs. */
-	ALU(t, ip, +);
+	/* Structs */
+	__instr_alu_add_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3059,10 +3057,8 @@ instr_alu_add_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, +);
+	__instr_alu_add_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3074,10 +3070,8 @@ instr_alu_add_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, +);
+	__instr_alu_add_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3089,10 +3083,8 @@ instr_alu_add_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, +);
+	__instr_alu_add_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3104,10 +3096,8 @@ instr_alu_add_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, +);
+	__instr_alu_add_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3119,10 +3109,8 @@ instr_alu_add_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, +);
+	__instr_alu_add_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3134,10 +3122,8 @@ instr_alu_sub_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, -);
+	__instr_alu_sub_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3149,10 +3135,8 @@ instr_alu_sub_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, -);
+	__instr_alu_sub_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3164,10 +3148,8 @@ instr_alu_sub_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, -);
+	__instr_alu_sub_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3179,10 +3161,8 @@ instr_alu_sub_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, -);
+	__instr_alu_sub_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3194,10 +3174,8 @@ instr_alu_sub_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, -);
+	__instr_alu_sub_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3209,10 +3187,8 @@ instr_alu_sub_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, -);
+	__instr_alu_sub_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3224,10 +3200,8 @@ instr_alu_shl_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, <<);
+	__instr_alu_shl_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3239,10 +3213,8 @@ instr_alu_shl_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, <<);
+	__instr_alu_shl_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3254,10 +3226,8 @@ instr_alu_shl_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, <<);
+	__instr_alu_shl_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3269,10 +3239,8 @@ instr_alu_shl_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, <<);
+	__instr_alu_shl_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3284,10 +3252,8 @@ instr_alu_shl_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, <<);
+	__instr_alu_shl_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3299,10 +3265,8 @@ instr_alu_shl_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, <<);
+	__instr_alu_shl_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3314,10 +3278,8 @@ instr_alu_shr_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, >>);
+	__instr_alu_shr_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3329,10 +3291,8 @@ instr_alu_shr_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, >>);
+	__instr_alu_shr_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3344,10 +3304,8 @@ instr_alu_shr_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, >>);
+	__instr_alu_shr_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3359,10 +3317,8 @@ instr_alu_shr_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, >>);
+	__instr_alu_shr_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3374,10 +3330,8 @@ instr_alu_shr_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, >>);
+	__instr_alu_shr_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3389,10 +3343,8 @@ instr_alu_shr_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, >>);
+	__instr_alu_shr_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3404,10 +3356,8 @@ instr_alu_and_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, &);
+	__instr_alu_and_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3419,10 +3369,8 @@ instr_alu_and_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, &);
+	__instr_alu_and_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3434,10 +3382,8 @@ instr_alu_and_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, &);
+	__instr_alu_and_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3449,10 +3395,8 @@ instr_alu_and_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, &);
+	__instr_alu_and_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3464,10 +3408,8 @@ instr_alu_and_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, &);
+	__instr_alu_and_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3479,10 +3421,8 @@ instr_alu_or_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, |);
+	__instr_alu_or_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3494,10 +3434,8 @@ instr_alu_or_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, |);
+	__instr_alu_or_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3509,10 +3447,8 @@ instr_alu_or_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, |);
+	__instr_alu_or_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3524,10 +3460,8 @@ instr_alu_or_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, |);
+	__instr_alu_or_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3539,10 +3473,8 @@ instr_alu_or_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, |);
+	__instr_alu_or_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3554,10 +3486,8 @@ instr_alu_xor_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, ^);
+	__instr_alu_xor_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3569,10 +3499,8 @@ instr_alu_xor_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, ^);
+	__instr_alu_xor_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3584,10 +3512,8 @@ instr_alu_xor_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, ^);
+	__instr_alu_xor_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3599,10 +3525,8 @@ instr_alu_xor_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, ^);
+	__instr_alu_xor_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3614,10 +3538,8 @@ instr_alu_xor_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, ^);
+	__instr_alu_xor_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3628,55 +3550,9 @@ instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* The first input (r) is a 16-bit number. The second and the third
-	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
-	 * three numbers (output r) is a 34-bit number.
-	 */
-	r += (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is an 18-bit
-	 * number. In the worst case scenario, the sum of the two numbers is a
-	 * 19-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
-	 * therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3687,67 +3563,9 @@ instr_alu_cksub_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
-	 * the following sequence of operations in 2's complement arithmetic:
-	 *    a '- b = (a - b) % 0xFFFF.
-	 *
-	 * In order to prevent an underflow for the below subtraction, in which
-	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
-	 * minuend), we first add a multiple of the 0xFFFF modulus to the
-	 * minuend. The number we add to the minuend needs to be a 34-bit number
-	 * or higher, so for readability reasons we picked the 36-bit multiple.
-	 * We are effectively turning the 16-bit minuend into a 36-bit number:
-	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
-	 */
-	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
-
-	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
-	 * result (the output r) is a 36-bit number.
-	 */
-	r -= (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_cksub_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3758,47 +3576,9 @@ instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r0, r1;
-
-	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
-	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
-	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
-	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
-	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
-
-	/* The first input is a 16-bit number. The second input is a 19-bit
-	 * number. Their sum is a 20-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	r0 = ~r0 & 0xFFFF;
-	r0 = r0 ? r0 : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r0;
+	__instr_alu_ckadd_struct20_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3809,49 +3589,9 @@ instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r = 0;
-	uint32_t i;
-
-	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
-	 * Therefore, in the worst case scenario, a 35-bit number is added to a
-	 * 16-bit number (the input r), so the output r is 36-bit number.
-	 */
-	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
-		r += *src32_ptr;
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_struct_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index ec8e342a5d..7c4a2c05ef 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2211,4 +2211,620 @@ __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct
 	__instr_dma_ht_many_exec(p, t, ip, 8);
 }
 
+/*
+ * alu.
+ */
+static inline void
+__instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add\n", p->thread_id);
+
+	ALU(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, +);
+}
+
+static inline void
+__instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub\n", p->thread_id);
+
+	ALU(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, -);
+}
+
+static inline void
+__instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl\n", p->thread_id);
+
+	ALU(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr\n", p->thread_id);
+
+	ALU(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
+
+	/* Structs. */
+	ALU_MI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and\n", p->thread_id);
+
+	ALU(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (i)\n", p->thread_id);
+
+	ALU_I(t, ip, &);
+}
+
+static inline void
+__instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or\n", p->thread_id);
+
+	ALU(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		      struct thread *t,
+		      const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (i)\n", p->thread_id);
+
+	ALU_I(t, ip, |);
+}
+
+static inline void
+__instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor\n", p->thread_id);
+
+	ALU(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
+
+	ALU_I(t, ip, ^);
+}
+
+static inline void
+__instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* The first input (r) is a 16-bit number. The second and the third
+	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
+	 * three numbers (output r) is a 34-bit number.
+	 */
+	r += (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is an 18-bit
+	 * number. In the worst case scenario, the sum of the two numbers is a
+	 * 19-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
+	 * therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
+	 * the following sequence of operations in 2's complement arithmetic:
+	 *    a '- b = (a - b) % 0xFFFF.
+	 *
+	 * In order to prevent an underflow for the below subtraction, in which
+	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
+	 * minuend), we first add a multiple of the 0xFFFF modulus to the
+	 * minuend. The number we add to the minuend needs to be a 34-bit number
+	 * or higher, so for readability reasons we picked the 36-bit multiple.
+	 * We are effectively turning the 16-bit minuend into a 36-bit number:
+	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
+	 */
+	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
+
+	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
+	 * result (the output r) is a 36-bit number.
+	 */
+	r -= (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
+				struct thread *t,
+				const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r0, r1;
+
+	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
+	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
+	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
+	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
+	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
+
+	/* The first input is a 16-bit number. The second input is a 19-bit
+	 * number. Their sum is a 20-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	r0 = ~r0 & 0xFFFF;
+	r0 = r0 ? r0 : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r0;
+}
+
+static inline void
+__instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
+			      struct thread *t,
+			      const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r = 0;
+	uint32_t i;
+
+	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
+	 * Therefore, in the worst case scenario, a 35-bit number is added to a
+	 * 16-bit number (the input r), so the output r is 36-bit number.
+	 */
+	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
+		r += *src32_ptr;
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 13/24] pipeline: create inline functions for register instructions
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (10 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
                     ` (12 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Create inline functions for the register instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 320 ++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 475 +++++++++++++++++++++++
 2 files changed, 502 insertions(+), 293 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 8956b6de27..c7117bb6da 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3901,134 +3901,14 @@ instr_regadd_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline uint64_t *
-instr_regarray_regarray(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-	return r->regarray;
-}
-
-static inline uint64_t
-instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
-	uint64_t idx = idx64 & idx64_mask & r->size_mask;
-
-	return idx;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint64_t
-instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
-
-	return idx;
-}
-
-#else
-
-#define instr_regarray_idx_nbo instr_regarray_idx_hbo
-
-#endif
-
-static inline uint64_t
-instr_regarray_idx_imm(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint64_t idx = ip->regarray.idx_val & r->size_mask;
-
-	return idx;
-}
-
-static inline uint64_t
-instr_regarray_src_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return src;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint64_t
-instr_regarray_src_nbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
-
-	return src;
-}
-
-#else
-
-#define instr_regarray_src_nbo instr_regarray_src_hbo
-
-#endif
-
-static inline void
-instr_regarray_dst_hbo_src_hbo_set(struct thread *t, struct instruction *ip, uint64_t src)
-{
-	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline void
-instr_regarray_dst_nbo_src_hbo_set(struct thread *t, struct instruction *ip, uint64_t src)
-{
-	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-
-	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-}
-
-#else
-
-#define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
-
-#endif
-
 static inline void
 instr_regprefetch_rh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_rh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4039,14 +3919,9 @@ instr_regprefetch_rm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_rm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4057,14 +3932,9 @@ instr_regprefetch_ri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_ri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4075,14 +3945,9 @@ instr_regrd_hrh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hrh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4093,14 +3958,9 @@ instr_regrd_hrm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hrm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4111,14 +3971,9 @@ instr_regrd_mrh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mrh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4129,12 +3984,9 @@ instr_regrd_mrm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mrm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4145,14 +3997,9 @@ instr_regrd_hri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4163,14 +4010,9 @@ instr_regrd_mri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4181,15 +4023,9 @@ instr_regwr_rhh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rhh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4200,15 +4036,9 @@ instr_regwr_rhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4219,15 +4049,9 @@ instr_regwr_rmh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rmh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4238,15 +4062,9 @@ instr_regwr_rmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4257,15 +4075,9 @@ instr_regwr_rhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4276,15 +4088,9 @@ instr_regwr_rmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4295,15 +4101,9 @@ instr_regwr_rih_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rih_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4314,15 +4114,9 @@ instr_regwr_rim_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rim_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4333,15 +4127,9 @@ instr_regwr_rii_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rii_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4352,15 +4140,9 @@ instr_regadd_rhh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rhh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4371,15 +4153,9 @@ instr_regadd_rhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4390,15 +4166,9 @@ instr_regadd_rmh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rmh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4409,15 +4179,9 @@ instr_regadd_rmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4428,15 +4192,9 @@ instr_regadd_rhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4447,15 +4205,9 @@ instr_regadd_rmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4466,15 +4218,9 @@ instr_regadd_rih_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rih_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4485,15 +4231,9 @@ instr_regadd_rim_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rim_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4504,15 +4244,9 @@ instr_regadd_rii_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rii_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7c4a2c05ef..2526c2f4c7 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2827,4 +2827,479 @@ __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
 	*dst16_ptr = (uint16_t)r;
 }
 
+/*
+ * Register array.
+ */
+static inline uint64_t *
+instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+	return r->regarray;
+}
+
+static inline uint64_t
+instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
+	uint64_t idx = idx64 & idx64_mask & r->size_mask;
+
+	return idx;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
+
+	return idx;
+}
+
+#else
+
+#define instr_regarray_idx_nbo instr_regarray_idx_hbo
+
+#endif
+
+static inline uint64_t
+instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint64_t idx = ip->regarray.idx_val & r->size_mask;
+
+	return idx;
+}
+
+static inline uint64_t
+instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return src;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
+
+	return src;
+}
+
+#else
+
+#define instr_regarray_src_nbo instr_regarray_src_hbo
+
+#endif
+
+static inline void
+instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
+{
+	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline void
+instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
+{
+	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+
+	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+}
+
+#else
+
+#define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
+
+#endif
+
+static inline void
+__instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
+			    struct thread *t __rte_unused,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
+
+	/* Structs. */
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rii_exec(struct rte_swx_pipeline *p,
+		       struct thread *t __rte_unused,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rii_exec(struct rte_swx_pipeline *p,
+			struct thread *t __rte_unused,
+			const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 14/24] pipeline: create inline functions for meter instructions
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (11 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
                     ` (11 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Create inline functions for the meter instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 457 +------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 541 +++++++++++++++++++++++
 2 files changed, 558 insertions(+), 440 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index c7117bb6da..8b64c57652 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -4470,119 +4470,14 @@ instr_meter_translate(struct rte_swx_pipeline *p,
 	CHECK(0, EINVAL);
 }
 
-static inline struct meter *
-instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
-	uint64_t idx = idx64 & idx64_mask & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline struct meter *
-instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-#else
-
-#define instr_meter_idx_nbo instr_meter_idx_hbo
-
-#endif
-
-static inline struct meter *
-instr_meter_idx_imm(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint64_t idx =  ip->meter.idx_val & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-static inline uint32_t
-instr_meter_length_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return (uint32_t)src;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint32_t
-instr_meter_length_nbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
-
-	return (uint32_t)src;
-}
-
-#else
-
-#define instr_meter_length_nbo instr_meter_length_hbo
-
-#endif
-
-static inline enum rte_color
-instr_meter_color_in_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return (enum rte_color)src;
-}
-
-static inline void
-instr_meter_color_out_hbo_set(struct thread *t, struct instruction *ip, enum rte_color color_out)
-{
-	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
-
-	uint64_t src = (uint64_t)color_out;
-
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-}
-
 static inline void
 instr_metprefetch_h_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_h_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4593,13 +4488,9 @@ instr_metprefetch_m_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_m_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4610,13 +4501,9 @@ instr_metprefetch_i_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4627,35 +4514,9 @@ instr_meter_hhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4666,35 +4527,9 @@ instr_meter_hhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4705,73 +4540,22 @@ instr_meter_hmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
 }
+
 static inline void
 instr_meter_hmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4782,35 +4566,9 @@ instr_meter_mhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4821,35 +4579,9 @@ instr_meter_mhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4860,35 +4592,9 @@ instr_meter_mmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4899,35 +4605,9 @@ instr_meter_mmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4938,35 +4618,9 @@ instr_meter_ihm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_ihm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4977,35 +4631,9 @@ instr_meter_ihi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_ihi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -5016,73 +4644,22 @@ instr_meter_imm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_imm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
 }
+
 static inline void
 instr_meter_imi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_imi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 2526c2f4c7..791adfb471 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -3302,4 +3302,545 @@ __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
 	regarray[idx] += src;
 }
 
+/*
+ * metarray.
+ */
+static inline struct meter *
+instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
+	uint64_t idx = idx64 & idx64_mask & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline struct meter *
+instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+#else
+
+#define instr_meter_idx_nbo instr_meter_idx_hbo
+
+#endif
+
+static inline struct meter *
+instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint64_t idx =  ip->meter.idx_val & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+static inline uint32_t
+instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return (uint32_t)src;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint32_t
+instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
+
+	return (uint32_t)src;
+}
+
+#else
+
+#define instr_meter_length_nbo instr_meter_length_hbo
+
+#endif
+
+static inline enum rte_color
+instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return (enum rte_color)src;
+}
+
+static inline void
+instr_meter_color_out_hbo_set(struct thread *t,
+			      const struct instruction *ip,
+			      enum rte_color color_out)
+{
+	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
+
+	uint64_t src = (uint64_t)color_out;
+
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+}
+
+static inline void
+__instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
+			   struct thread *t __rte_unused,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 15/24] pipeline: create inline functions for instruction operands
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (12 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
                     ` (10 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Create inline functions to get the instruction operands.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline_internal.h | 29 ++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 791adfb471..efd136196f 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -928,6 +928,35 @@ struct thread {
 #define HEADER_VALID(thread, header_id) \
 	MASK64_BIT_GET((thread)->valid_headers, header_id)
 
+static inline uint64_t
+instr_operand_hbo(struct thread *t, const struct instr_operand *x)
+{
+	uint8_t *x_struct = t->structs[x->struct_id];
+	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
+	uint64_t x64 = *x64_ptr;
+	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
+
+	return x64 & x64_mask;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_operand_nbo(struct thread *t, const struct instr_operand *x)
+{
+	uint8_t *x_struct = t->structs[x->struct_id];
+	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
+	uint64_t x64 = *x64_ptr;
+
+	return ntoh64(x64) >> (64 - x->n_bits);
+}
+
+#else
+
+#define instr_operand_nbo instr_operand_hbo
+
+#endif
+
 #define ALU(thread, ip, operator)  \
 {                                                                              \
 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 16/24] pipeline: enable persistent instruction meta-data
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (13 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 17/24] pipeline: introduce action functions Cristian Dumitrescu
                     ` (9 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Save the instruction meta-data for later use instead of freeing it up
once the instruction translation is completed.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 9 ++++++---
 lib/pipeline/rte_swx_pipeline_internal.h | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 8b64c57652..4099e364f5 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -6456,13 +6456,14 @@ instruction_config(struct rte_swx_pipeline *p,
 
 	if (a) {
 		a->instructions = instr;
+		a->instruction_data = data;
 		a->n_instructions = n_instructions;
 	} else {
 		p->instructions = instr;
+		p->instruction_data = data;
 		p->n_instructions = n_instructions;
 	}
 
-	free(data);
 	return 0;
 
 error:
@@ -6811,8 +6812,8 @@ action_build(struct rte_swx_pipeline *p)
 {
 	struct action *action;
 
-	p->action_instructions = calloc(p->n_actions,
-					sizeof(struct instruction *));
+	/* p->action_instructions. */
+	p->action_instructions = calloc(p->n_actions, sizeof(struct instruction *));
 	CHECK(p->action_instructions, ENOMEM);
 
 	TAILQ_FOREACH(action, &p->actions, node)
@@ -6841,6 +6842,7 @@ action_free(struct rte_swx_pipeline *p)
 			break;
 
 		TAILQ_REMOVE(&p->actions, action, node);
+		free(action->instruction_data);
 		free(action->instructions);
 		free(action);
 	}
@@ -8777,6 +8779,7 @@ rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 	if (!p)
 		return;
 
+	free(p->instruction_data);
 	free(p->instructions);
 
 	metarray_free(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index efd136196f..7a02d6cb5f 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -693,6 +693,7 @@ struct action {
 	struct struct_type *st;
 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
 	struct instruction *instructions;
+	struct instruction_data *instruction_data;
 	uint32_t n_instructions;
 	uint32_t id;
 };
@@ -1388,6 +1389,7 @@ struct rte_swx_pipeline {
 	struct regarray_runtime *regarray_runtime;
 	struct metarray_runtime *metarray_runtime;
 	struct instruction *instructions;
+	struct instruction_data *instruction_data;
 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
 
 	uint32_t n_structs;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 17/24] pipeline: introduce action functions
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (14 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
                     ` (8 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

For better performance, the option to run a single function per action
is now provided, which requires a single function call per action that
can be better optimized by the C compiler, as opposed to one function
call per instruction. Special table lookup instructions are added to
to support this feature.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 127 +++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h |   6 ++
 2 files changed, 133 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 4099e364f5..0d02548137 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2097,6 +2097,62 @@ instr_table_exec(struct rte_swx_pipeline *p)
 	thread_ip_action_call(p, t, action_id);
 }
 
+static inline void
+instr_table_af_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	uint32_t table_id = ip->table.table_id;
+	struct rte_swx_table_state *ts = &t->table_state[table_id];
+	struct table_runtime *table = &t->tables[table_id];
+	struct table_statistics *stats = &p->table_stats[table_id];
+	uint64_t action_id, n_pkts_hit, n_pkts_action;
+	uint8_t *action_data;
+	action_func_t action_func;
+	int done, hit;
+
+	/* Table. */
+	done = table->func(ts->obj,
+			   table->mailbox,
+			   table->key,
+			   &action_id,
+			   &action_data,
+			   &hit);
+	if (!done) {
+		/* Thread. */
+		TRACE("[Thread %2u] table %u (not finalized)\n",
+		      p->thread_id,
+		      table_id);
+
+		thread_yield(p);
+		return;
+	}
+
+	action_id = hit ? action_id : ts->default_action_id;
+	action_data = hit ? action_data : ts->default_action_data;
+	action_func = p->action_funcs[action_id];
+	n_pkts_hit = stats->n_pkts_hit[hit];
+	n_pkts_action = stats->n_pkts_action[action_id];
+
+	TRACE("[Thread %2u] table %u (%s, action %u)\n",
+	      p->thread_id,
+	      table_id,
+	      hit ? "hit" : "miss",
+	      (uint32_t)action_id);
+
+	t->action_id = action_id;
+	t->structs[0] = action_data;
+	t->hit = hit;
+	stats->n_pkts_hit[hit] = n_pkts_hit + 1;
+	stats->n_pkts_action[action_id] = n_pkts_action + 1;
+
+	/* Thread. */
+	thread_ip_inc(p);
+
+	/* Action. */
+	action_func(p);
+}
+
 static inline void
 instr_selector_exec(struct rte_swx_pipeline *p)
 {
@@ -2193,6 +2249,68 @@ instr_learner_exec(struct rte_swx_pipeline *p)
 	thread_ip_action_call(p, t, action_id);
 }
 
+static inline void
+instr_learner_af_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	uint32_t learner_id = ip->table.table_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+	uint64_t action_id, n_pkts_hit, n_pkts_action, time;
+	uint8_t *action_data;
+	action_func_t action_func;
+	int done, hit;
+
+	/* Table. */
+	time = rte_get_tsc_cycles();
+
+	done = rte_swx_table_learner_lookup(ts->obj,
+					    l->mailbox,
+					    time,
+					    l->key,
+					    &action_id,
+					    &action_data,
+					    &hit);
+	if (!done) {
+		/* Thread. */
+		TRACE("[Thread %2u] learner %u (not finalized)\n",
+		      p->thread_id,
+		      learner_id);
+
+		thread_yield(p);
+		return;
+	}
+
+	action_id = hit ? action_id : ts->default_action_id;
+	action_data = hit ? action_data : ts->default_action_data;
+	action_func = p->action_funcs[action_id];
+	n_pkts_hit = stats->n_pkts_hit[hit];
+	n_pkts_action = stats->n_pkts_action[action_id];
+
+	TRACE("[Thread %2u] learner %u (%s, action %u)\n",
+	      p->thread_id,
+	      learner_id,
+	      hit ? "hit" : "miss",
+	      (uint32_t)action_id);
+
+	t->action_id = action_id;
+	t->structs[0] = action_data;
+	t->hit = hit;
+	t->learner_id = learner_id;
+	t->time = time;
+	stats->n_pkts_hit[hit] = n_pkts_hit + 1;
+	stats->n_pkts_action[action_id] = n_pkts_action + 1;
+
+	/* Thread. */
+	thread_ip_action_call(p, t, action_id);
+
+	/* Action */
+	action_func(p);
+}
+
 /*
  * learn.
  */
@@ -6618,8 +6736,10 @@ static instr_exec_t instruction_table[] = {
 	[INSTR_METER_IMI] = instr_meter_imi_exec,
 
 	[INSTR_TABLE] = instr_table_exec,
+	[INSTR_TABLE_AF] = instr_table_af_exec,
 	[INSTR_SELECTOR] = instr_selector_exec,
 	[INSTR_LEARNER] = instr_learner_exec,
+	[INSTR_LEARNER_AF] = instr_learner_af_exec,
 	[INSTR_LEARNER_LEARN] = instr_learn_exec,
 	[INSTR_LEARNER_FORGET] = instr_forget_exec,
 	[INSTR_EXTERN_OBJ] = instr_extern_obj_exec,
@@ -6819,12 +6939,19 @@ action_build(struct rte_swx_pipeline *p)
 	TAILQ_FOREACH(action, &p->actions, node)
 		p->action_instructions[action->id] = action->instructions;
 
+	/* p->action_funcs. */
+	p->action_funcs = calloc(p->n_actions, sizeof(action_func_t));
+	CHECK(p->action_funcs, ENOMEM);
+
 	return 0;
 }
 
 static void
 action_build_free(struct rte_swx_pipeline *p)
 {
+	free(p->action_funcs);
+	p->action_funcs = NULL;
+
 	free(p->action_instructions);
 	p->action_instructions = NULL;
 }
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7a02d6cb5f..3578a10501 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -443,8 +443,10 @@ enum instruction_type {
 
 	/* table TABLE */
 	INSTR_TABLE,
+	INSTR_TABLE_AF,
 	INSTR_SELECTOR,
 	INSTR_LEARNER,
+	INSTR_LEARNER_AF,
 
 	/* learn LEARNER ACTION_NAME */
 	INSTR_LEARNER_LEARN,
@@ -687,6 +689,9 @@ struct instruction_data {
 /*
  * Action.
  */
+typedef void
+(*action_func_t)(struct rte_swx_pipeline *p);
+
 struct action {
 	TAILQ_ENTRY(action) node;
 	char name[RTE_SWX_NAME_SIZE];
@@ -1382,6 +1387,7 @@ struct rte_swx_pipeline {
 	struct port_in_runtime *in;
 	struct port_out_runtime *out;
 	struct instruction **action_instructions;
+	action_func_t *action_funcs;
 	struct rte_swx_table_state *table_state;
 	struct table_statistics *table_stats;
 	struct selector_statistics *selector_stats;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 18/24] pipeline: introduce custom instructions
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (15 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 17/24] pipeline: introduce action functions Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
                     ` (7 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

For better performance, the option to create custom instructions when
the program is translated and add them on-the-fly to the pipeline is
now provided. Multiple regular instructions can now be consolidated
into a single C function optimized by the C compiler directly.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 6 +++++-
 lib/pipeline/rte_swx_pipeline_internal.h | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 0d02548137..598009c024 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -6592,7 +6592,11 @@ instruction_config(struct rte_swx_pipeline *p,
 
 typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
 
-static instr_exec_t instruction_table[] = {
+#ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
+#define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
+#endif
+
+static instr_exec_t instruction_table[RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX] = {
 	[INSTR_RX] = instr_rx_exec,
 	[INSTR_TX] = instr_tx_exec,
 	[INSTR_TX_I] = instr_tx_i_exec,
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 3578a10501..4ad6dd42dd 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -541,6 +541,9 @@ enum instruction_type {
 	 * Return from action
 	 */
 	INSTR_RETURN,
+
+	/* Start of custom instructions. */
+	INSTR_CUSTOM_0,
 };
 
 struct instr_operand {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 19/24] pipeline: introduce pipeline compilation
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (16 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
                     ` (6 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Lay the foundation to generate C code for the pipeline: C functions
for actions and custom instructions are generated, built as shared
object library and loaded into the pipeline.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 44 +++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 598009c024..4c0e3043ec 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -8953,6 +8953,9 @@ rte_swx_pipeline_instructions_config(struct rte_swx_pipeline *p,
 	return 0;
 }
 
+static int
+pipeline_compile(struct rte_swx_pipeline *p);
+
 int
 rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 {
@@ -9018,6 +9021,9 @@ rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 		goto error;
 
 	p->build_done = 1;
+
+	pipeline_compile(p);
+
 	return 0;
 
 error:
@@ -9760,3 +9766,41 @@ rte_swx_ctl_meter_stats_read(struct rte_swx_pipeline *p,
 
 	return 0;
 }
+
+/*
+ * Pipeline compilation.
+ */
+static int
+pipeline_codegen(struct rte_swx_pipeline *p)
+{
+	FILE *f = NULL;
+
+	if (!p)
+		return -EINVAL;
+
+	/* Create the .c file. */
+	f = fopen("/tmp/pipeline.c", "w");
+	if (!f)
+		return -EIO;
+
+	/* Include the .h file. */
+	fprintf(f, "#include \"rte_swx_pipeline_internal.h\"\n");
+
+	/* Close the .c file. */
+	fclose(f);
+
+	return 0;
+}
+
+static int
+pipeline_compile(struct rte_swx_pipeline *p)
+{
+	int status = 0;
+
+	/* Code generation. */
+	status = pipeline_codegen(p);
+	if (status)
+		return status;
+
+	return status;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 20/24] pipeline: export pipeline instructions to file
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (17 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 21/24] pipeline: generate action functions Cristian Dumitrescu
                     ` (5 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Export the array of translated instructions to a C file. There is one
such array per action and one for the pipeline.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 1093 +++++++++++++++++++++++++++++++
 1 file changed, 1093 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 4c0e3043ec..58132e635f 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -9770,9 +9770,1093 @@ rte_swx_ctl_meter_stats_read(struct rte_swx_pipeline *p,
 /*
  * Pipeline compilation.
  */
+static const char *
+instr_type_to_name(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_RX: return "INSTR_RX";
+
+	case INSTR_TX: return "INSTR_TX";
+	case INSTR_TX_I: return "INSTR_TX_I";
+
+	case INSTR_HDR_EXTRACT: return "INSTR_HDR_EXTRACT";
+	case INSTR_HDR_EXTRACT2: return "INSTR_HDR_EXTRACT2";
+	case INSTR_HDR_EXTRACT3: return "INSTR_HDR_EXTRACT3";
+	case INSTR_HDR_EXTRACT4: return "INSTR_HDR_EXTRACT4";
+	case INSTR_HDR_EXTRACT5: return "INSTR_HDR_EXTRACT5";
+	case INSTR_HDR_EXTRACT6: return "INSTR_HDR_EXTRACT6";
+	case INSTR_HDR_EXTRACT7: return "INSTR_HDR_EXTRACT7";
+	case INSTR_HDR_EXTRACT8: return "INSTR_HDR_EXTRACT8";
+
+	case INSTR_HDR_EXTRACT_M: return "INSTR_HDR_EXTRACT_M";
+
+	case INSTR_HDR_LOOKAHEAD: return "INSTR_HDR_LOOKAHEAD";
+
+	case INSTR_HDR_EMIT: return "INSTR_HDR_EMIT";
+	case INSTR_HDR_EMIT_TX: return "INSTR_HDR_EMIT_TX";
+	case INSTR_HDR_EMIT2_TX: return "INSTR_HDR_EMIT2_TX";
+	case INSTR_HDR_EMIT3_TX: return "INSTR_HDR_EMIT3_TX";
+	case INSTR_HDR_EMIT4_TX: return "INSTR_HDR_EMIT4_TX";
+	case INSTR_HDR_EMIT5_TX: return "INSTR_HDR_EMIT5_TX";
+	case INSTR_HDR_EMIT6_TX: return "INSTR_HDR_EMIT6_TX";
+	case INSTR_HDR_EMIT7_TX: return "INSTR_HDR_EMIT7_TX";
+	case INSTR_HDR_EMIT8_TX: return "INSTR_HDR_EMIT8_TX";
+
+	case INSTR_HDR_VALIDATE: return "INSTR_HDR_VALIDATE";
+	case INSTR_HDR_INVALIDATE: return "INSTR_HDR_INVALIDATE";
+
+	case INSTR_MOV: return "INSTR_MOV";
+	case INSTR_MOV_MH: return "INSTR_MOV_MH";
+	case INSTR_MOV_HM: return "INSTR_MOV_HM";
+	case INSTR_MOV_HH: return "INSTR_MOV_HH";
+	case INSTR_MOV_I: return "INSTR_MOV_I";
+
+	case INSTR_DMA_HT: return "INSTR_DMA_HT";
+	case INSTR_DMA_HT2: return "INSTR_DMA_HT2";
+	case INSTR_DMA_HT3: return "INSTR_DMA_HT3";
+	case INSTR_DMA_HT4: return "INSTR_DMA_HT4";
+	case INSTR_DMA_HT5: return "INSTR_DMA_HT5";
+	case INSTR_DMA_HT6: return "INSTR_DMA_HT6";
+	case INSTR_DMA_HT7: return "INSTR_DMA_HT7";
+	case INSTR_DMA_HT8: return "INSTR_DMA_HT8";
+
+	case INSTR_ALU_ADD: return "INSTR_ALU_ADD";
+	case INSTR_ALU_ADD_MH: return "INSTR_ALU_ADD_MH";
+	case INSTR_ALU_ADD_HM: return "INSTR_ALU_ADD_HM";
+	case INSTR_ALU_ADD_HH: return "INSTR_ALU_ADD_HH";
+	case INSTR_ALU_ADD_MI: return "INSTR_ALU_ADD_MI";
+	case INSTR_ALU_ADD_HI: return "INSTR_ALU_ADD_HI";
+
+	case INSTR_ALU_SUB: return "INSTR_ALU_SUB";
+	case INSTR_ALU_SUB_MH: return "INSTR_ALU_SUB_MH";
+	case INSTR_ALU_SUB_HM: return "INSTR_ALU_SUB_HM";
+	case INSTR_ALU_SUB_HH: return "INSTR_ALU_SUB_HH";
+	case INSTR_ALU_SUB_MI: return "INSTR_ALU_SUB_MI";
+	case INSTR_ALU_SUB_HI: return "INSTR_ALU_SUB_HI";
+
+	case INSTR_ALU_CKADD_FIELD: return "INSTR_ALU_CKADD_FIELD";
+	case INSTR_ALU_CKADD_STRUCT20: return "INSTR_ALU_CKADD_STRUCT20";
+	case INSTR_ALU_CKADD_STRUCT: return "INSTR_ALU_CKADD_STRUCT";
+	case INSTR_ALU_CKSUB_FIELD: return "INSTR_ALU_CKSUB_FIELD";
+
+	case INSTR_ALU_AND: return "INSTR_ALU_AND";
+	case INSTR_ALU_AND_MH: return "INSTR_ALU_AND_MH";
+	case INSTR_ALU_AND_HM: return "INSTR_ALU_AND_HM";
+	case INSTR_ALU_AND_HH: return "INSTR_ALU_AND_HH";
+	case INSTR_ALU_AND_I: return "INSTR_ALU_AND_I";
+
+	case INSTR_ALU_OR: return "INSTR_ALU_OR";
+	case INSTR_ALU_OR_MH: return "INSTR_ALU_OR_MH";
+	case INSTR_ALU_OR_HM: return "INSTR_ALU_OR_HM";
+	case INSTR_ALU_OR_HH: return "INSTR_ALU_OR_HH";
+	case INSTR_ALU_OR_I: return "INSTR_ALU_OR_I";
+
+	case INSTR_ALU_XOR: return "INSTR_ALU_XOR";
+	case INSTR_ALU_XOR_MH: return "INSTR_ALU_XOR_MH";
+	case INSTR_ALU_XOR_HM: return "INSTR_ALU_XOR_HM";
+	case INSTR_ALU_XOR_HH: return "INSTR_ALU_XOR_HH";
+	case INSTR_ALU_XOR_I: return "INSTR_ALU_XOR_I";
+
+	case INSTR_ALU_SHL: return "INSTR_ALU_SHL";
+	case INSTR_ALU_SHL_MH: return "INSTR_ALU_SHL_MH";
+	case INSTR_ALU_SHL_HM: return "INSTR_ALU_SHL_HM";
+	case INSTR_ALU_SHL_HH: return "INSTR_ALU_SHL_HH";
+	case INSTR_ALU_SHL_MI: return "INSTR_ALU_SHL_MI";
+	case INSTR_ALU_SHL_HI: return "INSTR_ALU_SHL_HI";
+
+	case INSTR_ALU_SHR: return "INSTR_ALU_SHR";
+	case INSTR_ALU_SHR_MH: return "INSTR_ALU_SHR_MH";
+	case INSTR_ALU_SHR_HM: return "INSTR_ALU_SHR_HM";
+	case INSTR_ALU_SHR_HH: return "INSTR_ALU_SHR_HH";
+	case INSTR_ALU_SHR_MI: return "INSTR_ALU_SHR_MI";
+	case INSTR_ALU_SHR_HI: return "INSTR_ALU_SHR_HI";
+
+	case INSTR_REGPREFETCH_RH: return "INSTR_REGPREFETCH_RH";
+	case INSTR_REGPREFETCH_RM: return "INSTR_REGPREFETCH_RM";
+	case INSTR_REGPREFETCH_RI: return "INSTR_REGPREFETCH_RI";
+
+	case INSTR_REGRD_HRH: return "INSTR_REGRD_HRH";
+	case INSTR_REGRD_HRM: return "INSTR_REGRD_HRM";
+	case INSTR_REGRD_HRI: return "INSTR_REGRD_HRI";
+	case INSTR_REGRD_MRH: return "INSTR_REGRD_MRH";
+	case INSTR_REGRD_MRM: return "INSTR_REGRD_MRM";
+	case INSTR_REGRD_MRI: return "INSTR_REGRD_MRI";
+
+	case INSTR_REGWR_RHH: return "INSTR_REGWR_RHH";
+	case INSTR_REGWR_RHM: return "INSTR_REGWR_RHM";
+	case INSTR_REGWR_RHI: return "INSTR_REGWR_RHI";
+	case INSTR_REGWR_RMH: return "INSTR_REGWR_RMH";
+	case INSTR_REGWR_RMM: return "INSTR_REGWR_RMM";
+	case INSTR_REGWR_RMI: return "INSTR_REGWR_RMI";
+	case INSTR_REGWR_RIH: return "INSTR_REGWR_RIH";
+	case INSTR_REGWR_RIM: return "INSTR_REGWR_RIM";
+	case INSTR_REGWR_RII: return "INSTR_REGWR_RII";
+
+	case INSTR_REGADD_RHH: return "INSTR_REGADD_RHH";
+	case INSTR_REGADD_RHM: return "INSTR_REGADD_RHM";
+	case INSTR_REGADD_RHI: return "INSTR_REGADD_RHI";
+	case INSTR_REGADD_RMH: return "INSTR_REGADD_RMH";
+	case INSTR_REGADD_RMM: return "INSTR_REGADD_RMM";
+	case INSTR_REGADD_RMI: return "INSTR_REGADD_RMI";
+	case INSTR_REGADD_RIH: return "INSTR_REGADD_RIH";
+	case INSTR_REGADD_RIM: return "INSTR_REGADD_RIM";
+	case INSTR_REGADD_RII: return "INSTR_REGADD_RII";
+
+	case INSTR_METPREFETCH_H: return "INSTR_METPREFETCH_H";
+	case INSTR_METPREFETCH_M: return "INSTR_METPREFETCH_M";
+	case INSTR_METPREFETCH_I: return "INSTR_METPREFETCH_I";
+
+	case INSTR_METER_HHM: return "INSTR_METER_HHM";
+	case INSTR_METER_HHI: return "INSTR_METER_HHI";
+	case INSTR_METER_HMM: return "INSTR_METER_HMM";
+	case INSTR_METER_HMI: return "INSTR_METER_HMI";
+	case INSTR_METER_MHM: return "INSTR_METER_MHM";
+	case INSTR_METER_MHI: return "INSTR_METER_MHI";
+	case INSTR_METER_MMM: return "INSTR_METER_MMM";
+	case INSTR_METER_MMI: return "INSTR_METER_MMI";
+	case INSTR_METER_IHM: return "INSTR_METER_IHM";
+	case INSTR_METER_IHI: return "INSTR_METER_IHI";
+	case INSTR_METER_IMM: return "INSTR_METER_IMM";
+	case INSTR_METER_IMI: return "INSTR_METER_IMI";
+
+	case INSTR_TABLE: return "INSTR_TABLE";
+	case INSTR_TABLE_AF: return "INSTR_TABLE_AF";
+	case INSTR_SELECTOR: return "INSTR_SELECTOR";
+	case INSTR_LEARNER: return "INSTR_LEARNER";
+	case INSTR_LEARNER_AF: return "INSTR_LEARNER_AF";
+
+	case INSTR_LEARNER_LEARN: return "INSTR_LEARNER_LEARN";
+	case INSTR_LEARNER_FORGET: return "INSTR_LEARNER_FORGET";
+
+	case INSTR_EXTERN_OBJ: return "INSTR_EXTERN_OBJ";
+	case INSTR_EXTERN_FUNC: return "INSTR_EXTERN_FUNC";
+
+	case INSTR_JMP: return "INSTR_JMP";
+	case INSTR_JMP_VALID: return "INSTR_JMP_VALID";
+	case INSTR_JMP_INVALID: return "INSTR_JMP_INVALID";
+	case INSTR_JMP_HIT: return "INSTR_JMP_HIT";
+	case INSTR_JMP_MISS: return "INSTR_JMP_MISS";
+	case INSTR_JMP_ACTION_HIT: return "INSTR_JMP_ACTION_HIT";
+	case INSTR_JMP_ACTION_MISS: return "INSTR_JMP_ACTION_MISS";
+	case INSTR_JMP_EQ: return "INSTR_JMP_EQ";
+	case INSTR_JMP_EQ_MH: return "INSTR_JMP_EQ_MH";
+	case INSTR_JMP_EQ_HM: return "INSTR_JMP_EQ_HM";
+	case INSTR_JMP_EQ_HH: return "INSTR_JMP_EQ_HH";
+	case INSTR_JMP_EQ_I: return "INSTR_JMP_EQ_I";
+	case INSTR_JMP_NEQ: return "INSTR_JMP_NEQ";
+	case INSTR_JMP_NEQ_MH: return "INSTR_JMP_NEQ_MH";
+	case INSTR_JMP_NEQ_HM: return "INSTR_JMP_NEQ_HM";
+	case INSTR_JMP_NEQ_HH: return "INSTR_JMP_NEQ_HH";
+	case INSTR_JMP_NEQ_I: return "INSTR_JMP_NEQ_I";
+	case INSTR_JMP_LT: return "INSTR_JMP_LT";
+	case INSTR_JMP_LT_MH: return "INSTR_JMP_LT_MH";
+	case INSTR_JMP_LT_HM: return "INSTR_JMP_LT_HM";
+	case INSTR_JMP_LT_HH: return "INSTR_JMP_LT_HH";
+	case INSTR_JMP_LT_MI: return "INSTR_JMP_LT_MI";
+	case INSTR_JMP_LT_HI: return "INSTR_JMP_LT_HI";
+	case INSTR_JMP_GT: return "INSTR_JMP_GT";
+	case INSTR_JMP_GT_MH: return "INSTR_JMP_GT_MH";
+	case INSTR_JMP_GT_HM: return "INSTR_JMP_GT_HM";
+	case INSTR_JMP_GT_HH: return "INSTR_JMP_GT_HH";
+	case INSTR_JMP_GT_MI: return "INSTR_JMP_GT_MI";
+	case INSTR_JMP_GT_HI: return "INSTR_JMP_GT_HI";
+
+	case INSTR_RETURN: return "INSTR_RETURN";
+
+	default: return "INSTR_UNKNOWN";
+	}
+}
+
+typedef void
+(*instruction_export_t)(struct instruction *, FILE *);
+
+static void
+instr_io_export(struct instruction *instr, FILE *f)
+{
+	uint32_t n_io = 0, n_io_imm = 0, n_hdrs = 0, i;
+
+	/* n_io, n_io_imm, n_hdrs. */
+	if (instr->type == INSTR_RX ||
+	    instr->type == INSTR_TX ||
+	    instr->type == INSTR_HDR_EXTRACT_M ||
+	    (instr->type >= INSTR_HDR_EMIT_TX && instr->type <= INSTR_HDR_EMIT8_TX))
+		n_io = 1;
+
+	if (instr->type == INSTR_TX_I)
+		n_io_imm = 1;
+
+	if (instr->type >= INSTR_HDR_EXTRACT && instr->type <= INSTR_HDR_EXTRACT8)
+		n_hdrs = 1 + (instr->type - INSTR_HDR_EXTRACT);
+
+	if (instr->type == INSTR_HDR_EXTRACT_M ||
+	    instr->type == INSTR_HDR_LOOKAHEAD ||
+	    instr->type == INSTR_HDR_EMIT)
+		n_hdrs = 1;
+
+	if (instr->type >= INSTR_HDR_EMIT_TX && instr->type <= INSTR_HDR_EMIT8_TX)
+		n_hdrs = 1 + (instr->type - INSTR_HDR_EMIT_TX);
+
+	/* instr. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	/* instr.io. */
+	fprintf(f,
+		"\t\t.io = {\n");
+
+	/* instr.io.io. */
+	if (n_io)
+		fprintf(f,
+			"\t\t\t.io = {\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t},\n",
+			instr->io.io.offset,
+			instr->io.io.n_bits);
+
+	if (n_io_imm)
+		fprintf(f,
+			"\t\t\t.io = {\n"
+			"\t\t\t\t.val = %u,\n"
+			"\t\t\t},\n",
+			instr->io.io.val);
+
+	/* instr.io.hdr. */
+	if (n_hdrs) {
+		fprintf(f,
+			"\t\t.hdr = {\n");
+
+		/* instr.io.hdr.header_id. */
+		fprintf(f,
+			"\t\t\t.header_id = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.header_id[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr.struct_id. */
+		fprintf(f,
+			"\t\t\t.struct_id = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.struct_id[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr.n_bytes. */
+		fprintf(f,
+			"\t\t\t.n_bytes = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.n_bytes[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr - closing curly brace. */
+		fprintf(f,
+			"\t\t\t}\n,");
+	}
+
+	/* instr.io - closing curly brace. */
+	fprintf(f,
+		"\t\t},\n");
+
+	/* instr - closing curly brace. */
+	fprintf(f,
+		"\t},\n");
+}
+
+static void
+instr_hdr_validate_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.valid = {\n"
+		"\t\t\t.header_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->valid.header_id);
+}
+
+static void
+instr_mov_export(struct instruction *instr, FILE *f)
+{
+	if (instr->type != INSTR_MOV_I)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.mov = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.src = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->mov.dst.struct_id,
+			instr->mov.dst.n_bits,
+			instr->mov.dst.offset,
+			instr->mov.src.struct_id,
+			instr->mov.src.n_bits,
+			instr->mov.src.offset);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.mov = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.src_val = %" PRIu64 ",\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->mov.dst.struct_id,
+			instr->mov.dst.n_bits,
+			instr->mov.dst.offset,
+			instr->mov.src_val);
+}
+
+static void
+instr_dma_ht_export(struct instruction *instr, FILE *f)
+{
+	uint32_t n_dma = 0, i;
+
+	/* n_dma. */
+	n_dma = 1 + (instr->type - INSTR_DMA_HT);
+
+	/* instr. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	/* instr.dma. */
+	fprintf(f,
+		"\t\t.dma = {\n");
+
+	/* instr.dma.dst. */
+	fprintf(f,
+		"\t\t\t.dst = {\n");
+
+	/* instr.dma.dst.header_id. */
+	fprintf(f,
+		"\t\t\t\t.header_id = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.dst.header_id[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.dst.struct_id. */
+	fprintf(f,
+		"\t\t\t\t.struct_id = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.dst.struct_id[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.dst - closing curly brace. */
+	fprintf(f,
+		"\t\t\t},\n");
+
+	/* instr.dma.src. */
+	fprintf(f,
+		"\t\t\t.src = {\n");
+
+	/* instr.dma.src.offset. */
+	fprintf(f,
+		"\t\t\t\t.offset = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.src.offset[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.src - closing curly brace. */
+	fprintf(f,
+		"\t\t\t},\n");
+
+	/* instr.dma.n_bytes. */
+	fprintf(f,
+		"\t\t\t.n_bytes = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.n_bytes[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma - closing curly brace. */
+	fprintf(f,
+		"\t\t},\n");
+
+	/* instr - closing curly brace. */
+	fprintf(f,
+		"\t},\n");
+}
+
+static void
+instr_alu_export(struct instruction *instr, FILE *f)
+{
+	int imm = 0;
+
+	if (instr->type == INSTR_ALU_ADD_MI ||
+	    instr->type == INSTR_ALU_ADD_HI ||
+	    instr->type == INSTR_ALU_SUB_MI ||
+	    instr->type == INSTR_ALU_SUB_HI ||
+	    instr->type == INSTR_ALU_SHL_MI ||
+	    instr->type == INSTR_ALU_SHL_HI ||
+	    instr->type == INSTR_ALU_SHR_MI ||
+	    instr->type == INSTR_ALU_SHR_HI ||
+	    instr->type == INSTR_ALU_AND_I ||
+	    instr->type == INSTR_ALU_OR_I ||
+	    instr->type == INSTR_ALU_XOR_I)
+		imm = 1;
+
+	if (!imm)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.alu = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.src = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->alu.dst.struct_id,
+			instr->alu.dst.n_bits,
+			instr->alu.dst.offset,
+			instr->alu.src.struct_id,
+			instr->alu.src.n_bits,
+			instr->alu.src.offset);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.alu = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.src_val = %" PRIu64 ",\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->alu.dst.struct_id,
+			instr->alu.dst.n_bits,
+			instr->alu.dst.offset,
+			instr->alu.src_val);
+}
+
+static void
+instr_reg_export(struct instruction *instr __rte_unused, FILE *f __rte_unused)
+{
+	int prefetch  = 0, idx_imm = 0, src_imm = 0;
+
+	if (instr->type == INSTR_REGPREFETCH_RH ||
+	    instr->type == INSTR_REGPREFETCH_RM ||
+	    instr->type == INSTR_REGPREFETCH_RI)
+		prefetch = 1;
+
+	/* index is the 3rd operand for the regrd instruction and the 2nd
+	 * operand for the regwr and regadd instructions.
+	 */
+	if (instr->type == INSTR_REGPREFETCH_RI ||
+	    instr->type == INSTR_REGRD_HRI ||
+	    instr->type == INSTR_REGRD_MRI ||
+	    instr->type == INSTR_REGWR_RIH ||
+	    instr->type == INSTR_REGWR_RIM ||
+	    instr->type == INSTR_REGWR_RII ||
+	    instr->type == INSTR_REGADD_RIH ||
+	    instr->type == INSTR_REGADD_RIM ||
+	    instr->type == INSTR_REGADD_RII)
+		idx_imm = 1;
+
+	/* src is the 3rd operand for the regwr and regadd instructions. */
+	if (instr->type == INSTR_REGWR_RHI ||
+	    instr->type == INSTR_REGWR_RMI ||
+	    instr->type == INSTR_REGWR_RII ||
+	    instr->type == INSTR_REGADD_RHI ||
+	    instr->type == INSTR_REGADD_RMI ||
+	    instr->type == INSTR_REGADD_RII)
+		src_imm = 1;
+
+	/* instr.regarray.regarray_id. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.regarray = {\n"
+		"\t\t\t.regarray_id = %u,\n",
+		instr_type_to_name(instr),
+		instr->regarray.regarray_id);
+
+	/* instr.regarray.idx / instr.regarray.idx_val. */
+	if (!idx_imm)
+		fprintf(f,
+			"\t\t\t\t.idx = {\n"
+			"\t\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t\t.offset = %u,\n"
+			"\t\t\t\t},\n",
+			instr->regarray.idx.struct_id,
+			instr->regarray.idx.n_bits,
+			instr->regarray.idx.offset);
+	else
+		fprintf(f,
+			"\t\t\t\t.idx_val = %u,\n",
+			instr->regarray.idx_val);
+
+	/* instr.regarray.dstsrc / instr.regarray.dstsrc_val. */
+	if (!prefetch) {
+		if (!src_imm)
+			fprintf(f,
+				"\t\t\t\t.dstsrc = {\n"
+				"\t\t\t\t\t.struct_id = %u,\n"
+				"\t\t\t\t\t.n_bits = %u,\n"
+				"\t\t\t\t\t.offset = %u,\n"
+				"\t\t\t\t},\n",
+				instr->regarray.dstsrc.struct_id,
+				instr->regarray.dstsrc.n_bits,
+				instr->regarray.dstsrc.offset);
+		else
+			fprintf(f,
+				"\t\t\t\t.dstsrc_val = %" PRIu64 ",\n",
+				instr->regarray.dstsrc_val);
+	}
+
+	/* instr.regarray and instr - closing curly braces. */
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+}
+
+static void
+instr_meter_export(struct instruction *instr __rte_unused, FILE *f __rte_unused)
+{
+	int prefetch  = 0, idx_imm = 0, color_in_imm = 0;
+
+	if (instr->type == INSTR_METPREFETCH_H ||
+	    instr->type == INSTR_METPREFETCH_M ||
+	    instr->type == INSTR_METPREFETCH_I)
+		prefetch = 1;
+
+	/* idx_imm. */
+	if (instr->type == INSTR_METPREFETCH_I ||
+	    instr->type == INSTR_METER_IHM ||
+	    instr->type == INSTR_METER_IHI ||
+	    instr->type == INSTR_METER_IMM ||
+	    instr->type == INSTR_METER_IMI)
+		idx_imm = 1;
+
+	/* color_in_imm. */
+	if (instr->type == INSTR_METER_HHI ||
+	    instr->type == INSTR_METER_HMI ||
+	    instr->type == INSTR_METER_MHI ||
+	    instr->type == INSTR_METER_MMI ||
+	    instr->type == INSTR_METER_IHI ||
+	    instr->type == INSTR_METER_IMI)
+		color_in_imm = 1;
+
+	/* instr.meter.metarray_id. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.meter = {\n"
+		"\t\t\t.metarray_id = %u,\n",
+		instr_type_to_name(instr),
+		instr->meter.metarray_id);
+
+	/* instr.meter.idx / instr.meter.idx_val. */
+	if (!idx_imm)
+		fprintf(f,
+			"\t\t\t.idx = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.idx.struct_id,
+			instr->meter.idx.n_bits,
+			instr->meter.idx.offset);
+	else
+		fprintf(f,
+			"\t\t\t.idx_val = %u,\n",
+			instr->meter.idx_val);
+
+	if (!prefetch) {
+		/* instr.meter.length. */
+		fprintf(f,
+			"\t\t\t.length = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.length.struct_id,
+			instr->meter.length.n_bits,
+			instr->meter.length.offset);
+
+		/* instr.meter.color_in / instr.meter.color_in_val. */
+		if (!color_in_imm)
+			fprintf(f,
+				"\t\t\t.color_in = {\n"
+				"\t\t\t\t.struct_id = %u,\n"
+				"\t\t\t\t.n_bits = %u,\n"
+				"\t\t\t\t.offset = %u,\n"
+				"\t\t\t},\n",
+				instr->meter.color_in.struct_id,
+				instr->meter.color_in.n_bits,
+				instr->meter.color_in.offset);
+		else
+			fprintf(f,
+				"\t\t\t.color_in_val = %u,\n",
+				(uint32_t)instr->meter.color_in_val);
+
+		/* instr.meter.color_out. */
+		fprintf(f,
+			"\t\t\t.color_out = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.color_out.struct_id,
+			instr->meter.color_out.n_bits,
+			instr->meter.color_out.offset);
+	}
+
+	/* instr.meter and instr - closing curly braces. */
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+}
+
+static void
+instr_table_export(struct instruction *instr,
+		FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.table = {\n"
+		"\t\t\t.table_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->table.table_id);
+}
+
+static void
+instr_learn_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.learn = {\n"
+		"\t\t\t\t.action_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->learn.action_id);
+}
+
+static void
+instr_forget_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t},\n",
+		instr_type_to_name(instr));
+}
+
+static void
+instr_extern_export(struct instruction *instr, FILE *f)
+{
+	if (instr->type == INSTR_EXTERN_OBJ)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.ext_obj = {\n"
+			"\t\t\t.ext_obj_id = %u,\n"
+			"\t\t\t.func_id = %u,\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->ext_obj.ext_obj_id,
+			instr->ext_obj.func_id);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.ext_func = {\n"
+			"\t\t\t.ext_func_id = %u,\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->ext_func.ext_func_id);
+}
+
+static void
+instr_jmp_export(struct instruction *instr, FILE *f __rte_unused)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.jmp = {\n"
+		"\t\t\t.ip = NULL,\n",
+		instr_type_to_name(instr));
+
+	switch (instr->type) {
+	case INSTR_JMP_VALID:
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"\t\t\t.header_id = %u,\n",
+			instr->jmp.header_id);
+		break;
+
+	case INSTR_JMP_ACTION_HIT:
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"\t\t\t.action_id = %u,\n",
+			instr->jmp.action_id);
+		break;
+
+	case INSTR_JMP_EQ:
+	case INSTR_JMP_EQ_MH:
+	case INSTR_JMP_EQ_HM:
+	case INSTR_JMP_EQ_HH:
+	case INSTR_JMP_NEQ:
+	case INSTR_JMP_NEQ_MH:
+	case INSTR_JMP_NEQ_HM:
+	case INSTR_JMP_NEQ_HH:
+	case INSTR_JMP_LT:
+	case INSTR_JMP_LT_MH:
+	case INSTR_JMP_LT_HM:
+	case INSTR_JMP_LT_HH:
+	case INSTR_JMP_GT:
+	case INSTR_JMP_GT_MH:
+	case INSTR_JMP_GT_HM:
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"\t\t\t.a = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.b = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->jmp.a.struct_id,
+			instr->jmp.a.n_bits,
+			instr->jmp.a.offset,
+			instr->jmp.b.struct_id,
+			instr->jmp.b.n_bits,
+			instr->jmp.b.offset);
+		break;
+
+	case INSTR_JMP_EQ_I:
+	case INSTR_JMP_NEQ_I:
+	case INSTR_JMP_LT_MI:
+	case INSTR_JMP_LT_HI:
+	case INSTR_JMP_GT_MI:
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"\t\t\t.a = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.b_val = %" PRIu64 ",\n",
+			instr->jmp.a.struct_id,
+			instr->jmp.a.n_bits,
+			instr->jmp.a.offset,
+			instr->jmp.b_val);
+		break;
+
+	default:
+		break;
+	}
+
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+}
+
+static void
+instr_return_export(struct instruction *instr,
+		FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	fprintf(f,
+		"\t},\n");
+}
+
+static instruction_export_t export_table[] = {
+	[INSTR_RX] = instr_io_export,
+
+	[INSTR_TX] = instr_io_export,
+	[INSTR_TX_I] = instr_io_export,
+
+	[INSTR_HDR_EXTRACT] = instr_io_export,
+	[INSTR_HDR_EXTRACT2] = instr_io_export,
+	[INSTR_HDR_EXTRACT3] = instr_io_export,
+	[INSTR_HDR_EXTRACT4] = instr_io_export,
+	[INSTR_HDR_EXTRACT5] = instr_io_export,
+	[INSTR_HDR_EXTRACT6] = instr_io_export,
+	[INSTR_HDR_EXTRACT7] = instr_io_export,
+	[INSTR_HDR_EXTRACT8] = instr_io_export,
+
+	[INSTR_HDR_EXTRACT_M] = instr_io_export,
+
+	[INSTR_HDR_LOOKAHEAD] = instr_io_export,
+
+	[INSTR_HDR_EMIT] = instr_io_export,
+	[INSTR_HDR_EMIT_TX] = instr_io_export,
+	[INSTR_HDR_EMIT2_TX] = instr_io_export,
+	[INSTR_HDR_EMIT3_TX] = instr_io_export,
+	[INSTR_HDR_EMIT4_TX] = instr_io_export,
+	[INSTR_HDR_EMIT5_TX] = instr_io_export,
+	[INSTR_HDR_EMIT6_TX] = instr_io_export,
+	[INSTR_HDR_EMIT7_TX] = instr_io_export,
+	[INSTR_HDR_EMIT8_TX] = instr_io_export,
+
+	[INSTR_HDR_VALIDATE] = instr_hdr_validate_export,
+	[INSTR_HDR_INVALIDATE] = instr_hdr_validate_export,
+
+	[INSTR_MOV] = instr_mov_export,
+	[INSTR_MOV_MH] = instr_mov_export,
+	[INSTR_MOV_HM] = instr_mov_export,
+	[INSTR_MOV_HH] = instr_mov_export,
+	[INSTR_MOV_I] = instr_mov_export,
+
+	[INSTR_DMA_HT]  = instr_dma_ht_export,
+	[INSTR_DMA_HT2] = instr_dma_ht_export,
+	[INSTR_DMA_HT3] = instr_dma_ht_export,
+	[INSTR_DMA_HT4] = instr_dma_ht_export,
+	[INSTR_DMA_HT5] = instr_dma_ht_export,
+	[INSTR_DMA_HT6] = instr_dma_ht_export,
+	[INSTR_DMA_HT7] = instr_dma_ht_export,
+	[INSTR_DMA_HT8] = instr_dma_ht_export,
+
+	[INSTR_ALU_ADD] = instr_alu_export,
+	[INSTR_ALU_ADD_MH] = instr_alu_export,
+	[INSTR_ALU_ADD_HM] = instr_alu_export,
+	[INSTR_ALU_ADD_HH] = instr_alu_export,
+	[INSTR_ALU_ADD_MI] = instr_alu_export,
+	[INSTR_ALU_ADD_HI] = instr_alu_export,
+
+	[INSTR_ALU_SUB] = instr_alu_export,
+	[INSTR_ALU_SUB_MH] = instr_alu_export,
+	[INSTR_ALU_SUB_HM] = instr_alu_export,
+	[INSTR_ALU_SUB_HH] = instr_alu_export,
+	[INSTR_ALU_SUB_MI] = instr_alu_export,
+	[INSTR_ALU_SUB_HI] = instr_alu_export,
+
+	[INSTR_ALU_CKADD_FIELD] = instr_alu_export,
+	[INSTR_ALU_CKADD_STRUCT] = instr_alu_export,
+	[INSTR_ALU_CKADD_STRUCT20] = instr_alu_export,
+	[INSTR_ALU_CKSUB_FIELD] = instr_alu_export,
+
+	[INSTR_ALU_AND] = instr_alu_export,
+	[INSTR_ALU_AND_MH] = instr_alu_export,
+	[INSTR_ALU_AND_HM] = instr_alu_export,
+	[INSTR_ALU_AND_HH] = instr_alu_export,
+	[INSTR_ALU_AND_I] = instr_alu_export,
+
+	[INSTR_ALU_OR] = instr_alu_export,
+	[INSTR_ALU_OR_MH] = instr_alu_export,
+	[INSTR_ALU_OR_HM] = instr_alu_export,
+	[INSTR_ALU_OR_HH] = instr_alu_export,
+	[INSTR_ALU_OR_I] = instr_alu_export,
+
+	[INSTR_ALU_XOR] = instr_alu_export,
+	[INSTR_ALU_XOR_MH] = instr_alu_export,
+	[INSTR_ALU_XOR_HM] = instr_alu_export,
+	[INSTR_ALU_XOR_HH] = instr_alu_export,
+	[INSTR_ALU_XOR_I] = instr_alu_export,
+
+	[INSTR_ALU_SHL] = instr_alu_export,
+	[INSTR_ALU_SHL_MH] = instr_alu_export,
+	[INSTR_ALU_SHL_HM] = instr_alu_export,
+	[INSTR_ALU_SHL_HH] = instr_alu_export,
+	[INSTR_ALU_SHL_MI] = instr_alu_export,
+	[INSTR_ALU_SHL_HI] = instr_alu_export,
+
+	[INSTR_ALU_SHR] = instr_alu_export,
+	[INSTR_ALU_SHR_MH] = instr_alu_export,
+	[INSTR_ALU_SHR_HM] = instr_alu_export,
+	[INSTR_ALU_SHR_HH] = instr_alu_export,
+	[INSTR_ALU_SHR_MI] = instr_alu_export,
+	[INSTR_ALU_SHR_HI] = instr_alu_export,
+
+	[INSTR_REGPREFETCH_RH] = instr_reg_export,
+	[INSTR_REGPREFETCH_RM] = instr_reg_export,
+	[INSTR_REGPREFETCH_RI] = instr_reg_export,
+
+	[INSTR_REGRD_HRH] = instr_reg_export,
+	[INSTR_REGRD_HRM] = instr_reg_export,
+	[INSTR_REGRD_MRH] = instr_reg_export,
+	[INSTR_REGRD_MRM] = instr_reg_export,
+	[INSTR_REGRD_HRI] = instr_reg_export,
+	[INSTR_REGRD_MRI] = instr_reg_export,
+
+	[INSTR_REGWR_RHH] = instr_reg_export,
+	[INSTR_REGWR_RHM] = instr_reg_export,
+	[INSTR_REGWR_RMH] = instr_reg_export,
+	[INSTR_REGWR_RMM] = instr_reg_export,
+	[INSTR_REGWR_RHI] = instr_reg_export,
+	[INSTR_REGWR_RMI] = instr_reg_export,
+	[INSTR_REGWR_RIH] = instr_reg_export,
+	[INSTR_REGWR_RIM] = instr_reg_export,
+	[INSTR_REGWR_RII] = instr_reg_export,
+
+	[INSTR_REGADD_RHH] = instr_reg_export,
+	[INSTR_REGADD_RHM] = instr_reg_export,
+	[INSTR_REGADD_RMH] = instr_reg_export,
+	[INSTR_REGADD_RMM] = instr_reg_export,
+	[INSTR_REGADD_RHI] = instr_reg_export,
+	[INSTR_REGADD_RMI] = instr_reg_export,
+	[INSTR_REGADD_RIH] = instr_reg_export,
+	[INSTR_REGADD_RIM] = instr_reg_export,
+	[INSTR_REGADD_RII] = instr_reg_export,
+
+	[INSTR_METPREFETCH_H] = instr_meter_export,
+	[INSTR_METPREFETCH_M] = instr_meter_export,
+	[INSTR_METPREFETCH_I] = instr_meter_export,
+
+	[INSTR_METER_HHM] = instr_meter_export,
+	[INSTR_METER_HHI] = instr_meter_export,
+	[INSTR_METER_HMM] = instr_meter_export,
+	[INSTR_METER_HMI] = instr_meter_export,
+	[INSTR_METER_MHM] = instr_meter_export,
+	[INSTR_METER_MHI] = instr_meter_export,
+	[INSTR_METER_MMM] = instr_meter_export,
+	[INSTR_METER_MMI] = instr_meter_export,
+	[INSTR_METER_IHM] = instr_meter_export,
+	[INSTR_METER_IHI] = instr_meter_export,
+	[INSTR_METER_IMM] = instr_meter_export,
+	[INSTR_METER_IMI] = instr_meter_export,
+
+	[INSTR_TABLE] = instr_table_export,
+	[INSTR_TABLE_AF] = instr_table_export,
+	[INSTR_SELECTOR] = instr_table_export,
+	[INSTR_LEARNER] = instr_table_export,
+	[INSTR_LEARNER_AF] = instr_table_export,
+
+	[INSTR_LEARNER_LEARN] = instr_learn_export,
+	[INSTR_LEARNER_FORGET] = instr_forget_export,
+
+	[INSTR_EXTERN_OBJ] = instr_extern_export,
+	[INSTR_EXTERN_FUNC] = instr_extern_export,
+
+	[INSTR_JMP] = instr_jmp_export,
+	[INSTR_JMP_VALID] = instr_jmp_export,
+	[INSTR_JMP_INVALID] = instr_jmp_export,
+	[INSTR_JMP_HIT] = instr_jmp_export,
+	[INSTR_JMP_MISS] = instr_jmp_export,
+	[INSTR_JMP_ACTION_HIT] = instr_jmp_export,
+	[INSTR_JMP_ACTION_MISS] = instr_jmp_export,
+
+	[INSTR_JMP_EQ] = instr_jmp_export,
+	[INSTR_JMP_EQ_MH] = instr_jmp_export,
+	[INSTR_JMP_EQ_HM] = instr_jmp_export,
+	[INSTR_JMP_EQ_HH] = instr_jmp_export,
+	[INSTR_JMP_EQ_I] = instr_jmp_export,
+
+	[INSTR_JMP_NEQ] = instr_jmp_export,
+	[INSTR_JMP_NEQ_MH] = instr_jmp_export,
+	[INSTR_JMP_NEQ_HM] = instr_jmp_export,
+	[INSTR_JMP_NEQ_HH] = instr_jmp_export,
+	[INSTR_JMP_NEQ_I] = instr_jmp_export,
+
+	[INSTR_JMP_LT] = instr_jmp_export,
+	[INSTR_JMP_LT_MH] = instr_jmp_export,
+	[INSTR_JMP_LT_HM] = instr_jmp_export,
+	[INSTR_JMP_LT_HH] = instr_jmp_export,
+	[INSTR_JMP_LT_MI] = instr_jmp_export,
+	[INSTR_JMP_LT_HI] = instr_jmp_export,
+
+	[INSTR_JMP_GT] = instr_jmp_export,
+	[INSTR_JMP_GT_MH] = instr_jmp_export,
+	[INSTR_JMP_GT_HM] = instr_jmp_export,
+	[INSTR_JMP_GT_HH] = instr_jmp_export,
+	[INSTR_JMP_GT_MI] = instr_jmp_export,
+	[INSTR_JMP_GT_HI] = instr_jmp_export,
+
+	[INSTR_RETURN] = instr_return_export,
+};
+
+static void
+action_data_codegen(struct action *a, FILE *f)
+{
+	uint32_t i;
+
+	fprintf(f,
+		"static const struct instruction action_%s_instructions[] = {\n",
+		a->name);
+
+	for (i = 0; i < a->n_instructions; i++) {
+		struct instruction *instr = &a->instructions[i];
+		instruction_export_t func = export_table[instr->type];
+
+		func(instr, f);
+	}
+
+	fprintf(f, "};\n");
+}
+
 static int
 pipeline_codegen(struct rte_swx_pipeline *p)
 {
+	struct action *a;
 	FILE *f = NULL;
 
 	if (!p)
@@ -9786,6 +10870,15 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 	/* Include the .h file. */
 	fprintf(f, "#include \"rte_swx_pipeline_internal.h\"\n");
 
+	/* Add the code for each action. */
+	TAILQ_FOREACH(a, &p->actions, node) {
+		fprintf(f, "/**\n * Action %s\n */\n\n", a->name);
+
+		action_data_codegen(a, f);
+
+		fprintf(f, "\n");
+	}
+
 	/* Close the .c file. */
 	fclose(f);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 21/24] pipeline: generate action functions
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (18 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
                     ` (4 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Generate a C function for each action. For most instructions, the
associated inline function is called directly. Special care is taken
for TX, jump and return instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 662 ++++++++++++++++++++++++++++++++
 1 file changed, 662 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 58132e635f..4665f61f64 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1376,6 +1376,26 @@ instruction_is_tx(enum instruction_type type)
 	}
 }
 
+static int
+instruction_does_tx(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_TX:
+	case INSTR_TX_I:
+	case INSTR_HDR_EMIT_TX:
+	case INSTR_HDR_EMIT2_TX:
+	case INSTR_HDR_EMIT3_TX:
+	case INSTR_HDR_EMIT4_TX:
+	case INSTR_HDR_EMIT5_TX:
+	case INSTR_HDR_EMIT6_TX:
+	case INSTR_HDR_EMIT7_TX:
+	case INSTR_HDR_EMIT8_TX:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 static int
 instruction_is_jmp(struct instruction *instr)
 {
@@ -10853,6 +10873,644 @@ action_data_codegen(struct action *a, FILE *f)
 	fprintf(f, "};\n");
 }
 
+static const char *
+instr_type_to_func(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_RX: return NULL;
+
+	case INSTR_TX: return "__instr_tx_exec";
+	case INSTR_TX_I: return "__instr_tx_i_exec";
+
+	case INSTR_HDR_EXTRACT: return "__instr_hdr_extract_exec";
+	case INSTR_HDR_EXTRACT2: return "__instr_hdr_extract2_exec";
+	case INSTR_HDR_EXTRACT3: return "__instr_hdr_extract3_exec";
+	case INSTR_HDR_EXTRACT4: return "__instr_hdr_extract4_exec";
+	case INSTR_HDR_EXTRACT5: return "__instr_hdr_extract5_exec";
+	case INSTR_HDR_EXTRACT6: return "__instr_hdr_extract6_exec";
+	case INSTR_HDR_EXTRACT7: return "__instr_hdr_extract7_exec";
+	case INSTR_HDR_EXTRACT8: return "__instr_hdr_extract8_exec";
+
+	case INSTR_HDR_EXTRACT_M: return "__instr_hdr_extract_m_exec";
+
+	case INSTR_HDR_LOOKAHEAD: return "__instr_hdr_lookahead_exec";
+
+	case INSTR_HDR_EMIT: return "__instr_hdr_emit_exec";
+	case INSTR_HDR_EMIT_TX: return "__instr_hdr_emit_tx_exec";
+	case INSTR_HDR_EMIT2_TX: return "__instr_hdr_emit2_tx_exec";
+	case INSTR_HDR_EMIT3_TX: return "__instr_hdr_emit3_tx_exec";
+	case INSTR_HDR_EMIT4_TX: return "__instr_hdr_emit4_tx_exec";
+	case INSTR_HDR_EMIT5_TX: return "__instr_hdr_emit5_tx_exec";
+	case INSTR_HDR_EMIT6_TX: return "__instr_hdr_emit6_tx_exec";
+	case INSTR_HDR_EMIT7_TX: return "__instr_hdr_emit7_tx_exec";
+	case INSTR_HDR_EMIT8_TX: return "__instr_hdr_emit8_tx_exec";
+
+	case INSTR_HDR_VALIDATE: return "__instr_hdr_validate_exec";
+	case INSTR_HDR_INVALIDATE: return "__instr_hdr_invalidate_exec";
+
+	case INSTR_MOV: return "__instr_mov_exec";
+	case INSTR_MOV_MH: return "__instr_mov_mh_exec";
+	case INSTR_MOV_HM: return "__instr_mov_hm_exec";
+	case INSTR_MOV_HH: return "__instr_mov_hh_exec";
+	case INSTR_MOV_I: return "__instr_mov_i_exec";
+
+	case INSTR_DMA_HT: return "__instr_dma_ht_exec";
+	case INSTR_DMA_HT2: return "__instr_dma_ht2_exec";
+	case INSTR_DMA_HT3: return "__instr_dma_ht3_exec";
+	case INSTR_DMA_HT4: return "__instr_dma_ht4_exec";
+	case INSTR_DMA_HT5: return "__instr_dma_ht5_exec";
+	case INSTR_DMA_HT6: return "__instr_dma_ht6_exec";
+	case INSTR_DMA_HT7: return "__instr_dma_ht7_exec";
+	case INSTR_DMA_HT8: return "__instr_dma_ht8_exec";
+
+	case INSTR_ALU_ADD: return "__instr_alu_add_exec";
+	case INSTR_ALU_ADD_MH: return "__instr_alu_add_mh_exec";
+	case INSTR_ALU_ADD_HM: return "__instr_alu_add_hm_exec";
+	case INSTR_ALU_ADD_HH: return "__instr_alu_add_hh_exec";
+	case INSTR_ALU_ADD_MI: return "__instr_alu_add_mi_exec";
+	case INSTR_ALU_ADD_HI: return "__instr_alu_add_hi_exec";
+
+	case INSTR_ALU_SUB: return "__instr_alu_sub_exec";
+	case INSTR_ALU_SUB_MH: return "__instr_alu_sub_mh_exec";
+	case INSTR_ALU_SUB_HM: return "__instr_alu_sub_hm_exec";
+	case INSTR_ALU_SUB_HH: return "__instr_alu_sub_hh_exec";
+	case INSTR_ALU_SUB_MI: return "__instr_alu_sub_mi_exec";
+	case INSTR_ALU_SUB_HI: return "__instr_alu_sub_hi_exec";
+
+	case INSTR_ALU_CKADD_FIELD: return "__instr_alu_ckadd_field_exec";
+	case INSTR_ALU_CKADD_STRUCT20: return "__instr_alu_ckadd_struct20_exec";
+	case INSTR_ALU_CKADD_STRUCT: return "__instr_alu_ckadd_struct_exec";
+	case INSTR_ALU_CKSUB_FIELD: return "__instr_alu_cksub_field_exec";
+
+	case INSTR_ALU_AND: return "__instr_alu_and_exec";
+	case INSTR_ALU_AND_MH: return "__instr_alu_and_mh_exec";
+	case INSTR_ALU_AND_HM: return "__instr_alu_and_hm_exec";
+	case INSTR_ALU_AND_HH: return "__instr_alu_and_hh_exec";
+	case INSTR_ALU_AND_I: return "__instr_alu_and_i_exec";
+
+	case INSTR_ALU_OR: return "__instr_alu_or_exec";
+	case INSTR_ALU_OR_MH: return "__instr_alu_or_mh_exec";
+	case INSTR_ALU_OR_HM: return "__instr_alu_or_hm_exec";
+	case INSTR_ALU_OR_HH: return "__instr_alu_or_hh_exec";
+	case INSTR_ALU_OR_I: return "__instr_alu_or_i_exec";
+
+	case INSTR_ALU_XOR: return "__instr_alu_xor_exec";
+	case INSTR_ALU_XOR_MH: return "__instr_alu_xor_mh_exec";
+	case INSTR_ALU_XOR_HM: return "__instr_alu_xor_hm_exec";
+	case INSTR_ALU_XOR_HH: return "__instr_alu_xor_hh_exec";
+	case INSTR_ALU_XOR_I: return "__instr_alu_xor_i_exec";
+
+	case INSTR_ALU_SHL: return "__instr_alu_shl_exec";
+	case INSTR_ALU_SHL_MH: return "__instr_alu_shl_mh_exec";
+	case INSTR_ALU_SHL_HM: return "__instr_alu_shl_hm_exec";
+	case INSTR_ALU_SHL_HH: return "__instr_alu_shl_hh_exec";
+	case INSTR_ALU_SHL_MI: return "__instr_alu_shl_mi_exec";
+	case INSTR_ALU_SHL_HI: return "__instr_alu_shl_hi_exec";
+
+	case INSTR_ALU_SHR: return "__instr_alu_shr_exec";
+	case INSTR_ALU_SHR_MH: return "__instr_alu_shr_mh_exec";
+	case INSTR_ALU_SHR_HM: return "__instr_alu_shr_hm_exec";
+	case INSTR_ALU_SHR_HH: return "__instr_alu_shr_hh_exec";
+	case INSTR_ALU_SHR_MI: return "__instr_alu_shr_mi_exec";
+	case INSTR_ALU_SHR_HI: return "__instr_alu_shr_hi_exec";
+
+	case INSTR_REGPREFETCH_RH: return "__instr_regprefetch_rh_exec";
+	case INSTR_REGPREFETCH_RM: return "__instr_regprefetch_rm_exec";
+	case INSTR_REGPREFETCH_RI: return "__instr_regprefetch_ri_exec";
+
+	case INSTR_REGRD_HRH: return "__instr_regrd_hrh_exec";
+	case INSTR_REGRD_HRM: return "__instr_regrd_hrm_exec";
+	case INSTR_REGRD_HRI: return "__instr_regrd_hri_exec";
+	case INSTR_REGRD_MRH: return "__instr_regrd_mrh_exec";
+	case INSTR_REGRD_MRM: return "__instr_regrd_mrm_exec";
+	case INSTR_REGRD_MRI: return "__instr_regrd_mri_exec";
+
+	case INSTR_REGWR_RHH: return "__instr_regwr_rhh_exec";
+	case INSTR_REGWR_RHM: return "__instr_regwr_rhm_exec";
+	case INSTR_REGWR_RHI: return "__instr_regwr_rhi_exec";
+	case INSTR_REGWR_RMH: return "__instr_regwr_rmh_exec";
+	case INSTR_REGWR_RMM: return "__instr_regwr_rmm_exec";
+	case INSTR_REGWR_RMI: return "__instr_regwr_rmi_exec";
+	case INSTR_REGWR_RIH: return "__instr_regwr_rih_exec";
+	case INSTR_REGWR_RIM: return "__instr_regwr_rim_exec";
+	case INSTR_REGWR_RII: return "__instr_regwr_rii_exec";
+
+	case INSTR_REGADD_RHH: return "__instr_regadd_rhh_exec";
+	case INSTR_REGADD_RHM: return "__instr_regadd_rhm_exec";
+	case INSTR_REGADD_RHI: return "__instr_regadd_rhi_exec";
+	case INSTR_REGADD_RMH: return "__instr_regadd_rmh_exec";
+	case INSTR_REGADD_RMM: return "__instr_regadd_rmm_exec";
+	case INSTR_REGADD_RMI: return "__instr_regadd_rmi_exec";
+	case INSTR_REGADD_RIH: return "__instr_regadd_rih_exec";
+	case INSTR_REGADD_RIM: return "__instr_regadd_rim_exec";
+	case INSTR_REGADD_RII: return "__instr_regadd_rii_exec";
+
+	case INSTR_METPREFETCH_H: return "__instr_metprefetch_h_exec";
+	case INSTR_METPREFETCH_M: return "__instr_metprefetch_m_exec";
+	case INSTR_METPREFETCH_I: return "__instr_metprefetch_i_exec";
+
+	case INSTR_METER_HHM: return "__instr_meter_hhm_exec";
+	case INSTR_METER_HHI: return "__instr_meter_hhi_exec";
+	case INSTR_METER_HMM: return "__instr_meter_hmm_exec";
+	case INSTR_METER_HMI: return "__instr_meter_hmi_exec";
+	case INSTR_METER_MHM: return "__instr_meter_mhm_exec";
+	case INSTR_METER_MHI: return "__instr_meter_mhi_exec";
+	case INSTR_METER_MMM: return "__instr_meter_mmm_exec";
+	case INSTR_METER_MMI: return "__instr_meter_mmi_exec";
+	case INSTR_METER_IHM: return "__instr_meter_ihm_exec";
+	case INSTR_METER_IHI: return "__instr_meter_ihi_exec";
+	case INSTR_METER_IMM: return "__instr_meter_imm_exec";
+	case INSTR_METER_IMI: return "__instr_meter_imi_exec";
+
+	case INSTR_TABLE: return NULL;
+	case INSTR_TABLE_AF: return NULL;
+	case INSTR_SELECTOR: return NULL;
+	case INSTR_LEARNER: return NULL;
+	case INSTR_LEARNER_AF: return NULL;
+
+	case INSTR_LEARNER_LEARN: return "__instr_learn_exec";
+	case INSTR_LEARNER_FORGET: return "__instr_forget_exec";
+
+	case INSTR_EXTERN_OBJ: return NULL;
+	case INSTR_EXTERN_FUNC: return NULL;
+
+	case INSTR_JMP: return NULL;
+	case INSTR_JMP_VALID: return NULL;
+	case INSTR_JMP_INVALID: return NULL;
+	case INSTR_JMP_HIT: return NULL;
+	case INSTR_JMP_MISS: return NULL;
+	case INSTR_JMP_ACTION_HIT: return NULL;
+	case INSTR_JMP_ACTION_MISS: return NULL;
+	case INSTR_JMP_EQ: return NULL;
+	case INSTR_JMP_EQ_MH: return NULL;
+	case INSTR_JMP_EQ_HM: return NULL;
+	case INSTR_JMP_EQ_HH: return NULL;
+	case INSTR_JMP_EQ_I: return NULL;
+	case INSTR_JMP_NEQ: return NULL;
+	case INSTR_JMP_NEQ_MH: return NULL;
+	case INSTR_JMP_NEQ_HM: return NULL;
+	case INSTR_JMP_NEQ_HH: return NULL;
+	case INSTR_JMP_NEQ_I: return NULL;
+	case INSTR_JMP_LT: return NULL;
+	case INSTR_JMP_LT_MH: return NULL;
+	case INSTR_JMP_LT_HM: return NULL;
+	case INSTR_JMP_LT_HH: return NULL;
+	case INSTR_JMP_LT_MI: return NULL;
+	case INSTR_JMP_LT_HI: return NULL;
+	case INSTR_JMP_GT: return NULL;
+	case INSTR_JMP_GT_MH: return NULL;
+	case INSTR_JMP_GT_HM: return NULL;
+	case INSTR_JMP_GT_HH: return NULL;
+	case INSTR_JMP_GT_MI: return NULL;
+	case INSTR_JMP_GT_HI: return NULL;
+
+	case INSTR_RETURN: return NULL;
+
+	default: return NULL;
+	}
+}
+
+static void
+action_instr_does_tx_codegen(struct action *a,
+			uint32_t instr_pos,
+			struct instruction *instr,
+			FILE *f)
+{
+	fprintf(f,
+		"%s(p, t, &action_%s_instructions[%u]);\n"
+		"\tthread_ip_reset(p, t);\n"
+		"\tinstr_rx_exec(p);\n"
+		"\treturn;\n",
+		instr_type_to_func(instr),
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_extern_obj_codegen(struct action *a,
+				uint32_t instr_pos,
+				FILE *f)
+{
+	fprintf(f,
+		"while (!__instr_extern_obj_exec(p, t, &action_%s_instructions[%u]));\n",
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_extern_func_codegen(struct action *a,
+				 uint32_t instr_pos,
+				 FILE *f)
+{
+	fprintf(f,
+		"while (!__instr_extern_func_exec(p, t, &action_%s_instructions[%u]));\n",
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_jmp_codegen(struct action *a,
+			 uint32_t instr_pos,
+			 struct instruction *instr,
+			 struct instruction_data *data,
+			 FILE *f)
+{
+	switch (instr->type) {
+	case INSTR_JMP:
+		fprintf(f,
+			"goto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_VALID:
+		fprintf(f,
+			"if (HEADER_VALID(t, action_%s_instructions[%u].jmp.header_id))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"if (!HEADER_VALID(t, action_%s_instructions[%u].jmp.header_id))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_HIT:
+		fprintf(f,
+			"if (t->hit)\n"
+			"\t\tgoto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_MISS:
+		fprintf(f,
+			"if (!t->hit)\n"
+			"\t\tgoto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_ACTION_HIT:
+		fprintf(f,
+			"if (t->action_id == action_%s_instructions[%u].jmp.action_id)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"if (t->action_id != action_%s_instructions[%u].jmp.action_id)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	default:
+		return;
+	}
+}
+
+static void
+action_instr_return_codegen(FILE *f)
+{
+	fprintf(f,
+		"return;\n");
+}
+
+static void
+action_instr_codegen(struct action *a, FILE *f)
+{
+	uint32_t i;
+
+	fprintf(f,
+		"void\n"
+		"action_%s_run(struct rte_swx_pipeline *p)\n"
+		"{\n"
+		"\tstruct thread *t = &p->threads[p->thread_id];\n"
+		"\n",
+		a->name);
+
+	for (i = 0; i < a->n_instructions; i++) {
+		struct instruction *instr = &a->instructions[i];
+		struct instruction_data *data = &a->instruction_data[i];
+
+		/* Label, if present. */
+		if (data->label[0])
+			fprintf(f, "\n%s : ", data->label);
+		else
+			fprintf(f, "\n\t");
+
+		/* TX instruction type. */
+		if (instruction_does_tx(instr)) {
+			action_instr_does_tx_codegen(a, i, instr, f);
+			continue;
+		}
+
+		/* Extern object/function instruction type. */
+		if (instr->type == INSTR_EXTERN_OBJ) {
+			action_instr_extern_obj_codegen(a, i, f);
+			continue;
+		}
+
+		if (instr->type == INSTR_EXTERN_FUNC) {
+			action_instr_extern_func_codegen(a, i, f);
+			continue;
+		}
+
+		/* Jump instruction type. */
+		if (instruction_is_jmp(instr)) {
+			action_instr_jmp_codegen(a, i, instr, data, f);
+			continue;
+		}
+
+		/* Return instruction type. */
+		if (instr->type == INSTR_RETURN) {
+			action_instr_return_codegen(f);
+			continue;
+		}
+
+		/* Any other instruction type. */
+		fprintf(f,
+			"%s(p, t, &action_%s_instructions[%u]);\n",
+			instr_type_to_func(instr),
+			a->name,
+			i);
+	}
+
+	fprintf(f, "}\n\n");
+}
+
 static int
 pipeline_codegen(struct rte_swx_pipeline *p)
 {
@@ -10877,6 +11535,10 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 		action_data_codegen(a, f);
 
 		fprintf(f, "\n");
+
+		action_instr_codegen(a, f);
+
+		fprintf(f, "\n");
 	}
 
 	/* Close the .c file. */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 22/24] pipeline: generate custom instruction functions
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (19 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 21/24] pipeline: generate action functions Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
                     ` (3 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Generate a C function for each custom instruction, which essentially
consolidate multiple regular instructions into a single function call.
The pipeline program is split into groups of instructions, and a
custom instruction is generated for each group that has more than one
instruction. Special care is taken the instructions that can do thread
yield (RX, extern) and for those that can change the instruction
pointer (TX, near/far jump).

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 651 +++++++++++++++++++++++++++++++-
 1 file changed, 645 insertions(+), 6 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 4665f61f64..27005872b4 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1436,6 +1436,24 @@ instruction_is_jmp(struct instruction *instr)
 	}
 }
 
+static int
+instruction_does_thread_yield(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_RX:
+	case INSTR_TABLE:
+	case INSTR_TABLE_AF:
+	case INSTR_SELECTOR:
+	case INSTR_LEARNER:
+	case INSTR_LEARNER_AF:
+	case INSTR_EXTERN_OBJ:
+	case INSTR_EXTERN_FUNC:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 static struct field *
 action_field_parse(struct action *action, const char *name);
 
@@ -11511,15 +11529,623 @@ action_instr_codegen(struct action *a, FILE *f)
 	fprintf(f, "}\n\n");
 }
 
+struct instruction_group {
+	TAILQ_ENTRY(instruction_group) node;
+
+	uint32_t group_id;
+
+	uint32_t first_instr_id;
+
+	uint32_t last_instr_id;
+
+	instr_exec_t func;
+};
+
+TAILQ_HEAD(instruction_group_list, instruction_group);
+
+static struct instruction_group *
+instruction_group_list_group_find(struct instruction_group_list *igl, uint32_t instruction_id)
+{
+	struct instruction_group *g;
+
+	TAILQ_FOREACH(g, igl, node)
+		if ((g->first_instr_id <= instruction_id) && (instruction_id <= g->last_instr_id))
+			return g;
+
+	return NULL;
+}
+
+static void
+instruction_group_list_free(struct instruction_group_list *igl)
+{
+	if (!igl)
+		return;
+
+	for ( ; ; ) {
+		struct instruction_group *g;
+
+		g = TAILQ_FIRST(igl);
+		if (!g)
+			break;
+
+		TAILQ_REMOVE(igl, g, node);
+		free(g);
+	}
+
+	free(igl);
+}
+
+static struct instruction_group_list *
+instruction_group_list_create(struct rte_swx_pipeline *p)
+{
+	struct instruction_group_list *igl = NULL;
+	struct instruction_group *g = NULL;
+	uint32_t n_groups = 0, i;
+
+	if (!p || !p->instructions || !p->instruction_data || !p->n_instructions)
+		goto error;
+
+	/* List init. */
+	igl = calloc(1, sizeof(struct instruction_group_list));
+	if (!igl)
+		goto error;
+
+	TAILQ_INIT(igl);
+
+	/* Allocate the first group. */
+	g = calloc(1, sizeof(struct instruction_group));
+	if (!g)
+		goto error;
+
+	/* Iteration 1: Separate the instructions into groups based on the thread yield
+	 * instructions. Do not worry about the jump instructions at this point.
+	 */
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+
+		/* Check for thread yield instructions. */
+		if (!instruction_does_thread_yield(instr))
+			continue;
+
+		/* If the current group contains at least one instruction, then finalize it (with
+		 * the previous instruction), add it to the list and allocate a new group (that
+		 * starts with the current instruction).
+		 */
+		if (i - g->first_instr_id) {
+			/* Finalize the group. */
+			g->last_instr_id = i - 1;
+
+			/* Add the group to the list. Advance the number of groups. */
+			TAILQ_INSERT_TAIL(igl, g, node);
+			n_groups++;
+
+			/* Allocate a new group. */
+			g = calloc(1, sizeof(struct instruction_group));
+			if (!g)
+				goto error;
+
+			/* Initialize the new group. */
+			g->group_id = n_groups;
+			g->first_instr_id = i;
+		}
+
+		/* Finalize the current group (with the current instruction, therefore this group
+		 * contains just the current thread yield instruction), add it to the list and
+		 * allocate a new group (that starts with the next instruction).
+		 */
+
+		/* Finalize the group. */
+		g->last_instr_id = i;
+
+		/* Add the group to the list. Advance the number of groups. */
+		TAILQ_INSERT_TAIL(igl, g, node);
+		n_groups++;
+
+		/* Allocate a new group. */
+		g = calloc(1, sizeof(struct instruction_group));
+		if (!g)
+			goto error;
+
+		/* Initialize the new group. */
+		g->group_id = n_groups;
+		g->first_instr_id = i + 1;
+	}
+
+	/* Handle the last group. */
+	if (i - g->first_instr_id) {
+		/* Finalize the group. */
+		g->last_instr_id = i - 1;
+
+		/* Add the group to the list. Advance the number of groups. */
+		TAILQ_INSERT_TAIL(igl, g, node);
+		n_groups++;
+	} else
+		free(g);
+
+	g = NULL;
+
+	/* Iteration 2: Handle jumps. If the current group contains an instruction which represents
+	 * the destination of a jump instruction located in a different group ("far jump"), then the
+	 * current group has to be split, so that the instruction representing the far jump
+	 * destination is at the start of its group.
+	 */
+	for ( ; ; ) {
+		int is_modified = 0;
+
+		for (i = 0; i < p->n_instructions; i++) {
+			struct instruction_data *data = &p->instruction_data[i];
+			struct instruction_group *g;
+			uint32_t j;
+
+			/* Continue when the current instruction is not a jump destination. */
+			if (!data->n_users)
+				continue;
+
+			g = instruction_group_list_group_find(igl, i);
+			if (!g)
+				goto error;
+
+			/* Find out all the jump instructions with this destination. */
+			for (j = 0; j < p->n_instructions; j++) {
+				struct instruction *jmp_instr = &p->instructions[j];
+				struct instruction_data *jmp_data = &p->instruction_data[j];
+				struct instruction_group *jmp_g, *new_g;
+
+				/* Continue when not a jump instruction. Even when jump instruction,
+				 * continue when the jump destination is not this instruction.
+				 */
+				if (!instruction_is_jmp(jmp_instr) ||
+				    strcmp(jmp_data->jmp_label, data->label))
+					continue;
+
+				jmp_g = instruction_group_list_group_find(igl, j);
+				if (!jmp_g)
+					goto error;
+
+				/* Continue when both the jump instruction and the jump destination
+				 * instruction are in the same group. Even when in different groups,
+				 * still continue if the jump destination instruction is already the
+				 * first instruction of its group.
+				 */
+				if ((jmp_g->group_id == g->group_id) || (g->first_instr_id == i))
+					continue;
+
+				/* Split the group of the current jump destination instruction to
+				 * make this instruction the first instruction of a new group.
+				 */
+				new_g = calloc(1, sizeof(struct instruction_group));
+				if (!new_g)
+					goto error;
+
+				new_g->group_id = n_groups;
+				new_g->first_instr_id = i;
+				new_g->last_instr_id = g->last_instr_id;
+
+				g->last_instr_id = i - 1;
+
+				TAILQ_INSERT_AFTER(igl, g, new_g, node);
+				n_groups++;
+				is_modified = 1;
+
+				/* The decision to split this group (to make the current instruction
+				 * the first instruction of a new group) is already taken and fully
+				 * implemented, so no need to search for more reasons to do it.
+				 */
+				break;
+			}
+		}
+
+		/* Re-evaluate everything, as at least one group got split, so some jumps that were
+		 * previously considered local (i.e. the jump destination is in the same group as
+		 * the jump instruction) can now be "far jumps" (i.e. the jump destination is in a
+		 * different group than the jump instruction). Wost case scenario: each instruction
+		 * that is a jump destination ends up as the first instruction of its group.
+		 */
+		if (!is_modified)
+			break;
+	}
+
+	/* Re-assign the group IDs to be in incremental order. */
+	i = 0;
+	TAILQ_FOREACH(g, igl, node) {
+		g->group_id = i;
+
+		i++;
+	}
+
+	return igl;
+
+error:
+	instruction_group_list_free(igl);
+
+	free(g);
+
+	return NULL;
+}
+
+static void
+pipeline_instr_does_tx_codegen(struct rte_swx_pipeline *p __rte_unused,
+			       uint32_t instr_pos,
+			       struct instruction *instr,
+			       FILE *f)
+{
+	fprintf(f,
+		"%s(p, t, &pipeline_instructions[%u]);\n"
+		"\tthread_ip_reset(p, t);\n"
+		"\tinstr_rx_exec(p);\n"
+		"\treturn;\n",
+		instr_type_to_func(instr),
+		instr_pos);
+}
+
+static int
+pipeline_instr_jmp_codegen(struct rte_swx_pipeline *p,
+			   struct instruction_group_list *igl,
+			   uint32_t jmp_instr_id,
+			   struct instruction *jmp_instr,
+			   struct instruction_data *jmp_data,
+			   FILE *f)
+{
+	struct instruction_group *jmp_g, *g;
+	struct instruction_data *data;
+	uint32_t instr_id;
+
+	switch (jmp_instr->type) {
+	case INSTR_JMP:
+		break;
+
+	case INSTR_JMP_VALID:
+		fprintf(f,
+			"if (HEADER_VALID(t, pipeline_instructions[%u].jmp.header_id))",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"if (!HEADER_VALID(t, pipeline_instructions[%u].jmp.header_id))",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_HIT:
+		fprintf(f,
+			"if (t->hit)\n");
+		break;
+
+	case INSTR_JMP_MISS:
+		fprintf(f,
+			"if (!t->hit)\n");
+		break;
+
+	case INSTR_JMP_ACTION_HIT:
+		fprintf(f,
+			"if (t->action_id == pipeline_instructions[%u].jmp.action_id)",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"if (t->action_id != pipeline_instructions[%u].jmp.action_id)",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Find the instruction group of the jump instruction. */
+	jmp_g = instruction_group_list_group_find(igl, jmp_instr_id);
+	if (!jmp_g)
+		return -EINVAL;
+
+	/* Find the instruction group of the jump destination instruction. */
+	data = label_find(p->instruction_data, p->n_instructions, jmp_data->jmp_label);
+	if (!data)
+		return -EINVAL;
+
+	instr_id = data - p->instruction_data;
+
+	g = instruction_group_list_group_find(igl, instr_id);
+	if (!g)
+		return -EINVAL;
+
+	/* Code generation for "near" jump (same instruction group) or "far" jump (different
+	 * instruction group).
+	 */
+	if (g->group_id == jmp_g->group_id)
+		fprintf(f,
+			"\n\t\tgoto %s;\n",
+			jmp_data->jmp_label);
+	else
+		fprintf(f,
+			" {\n"
+			"\t\tthread_ip_set(t, &p->instructions[%u]);\n"
+			"\t\treturn;\n"
+			"\t}\n\n",
+			g->group_id);
+
+	return 0;
+}
+
+static void
+instruction_group_list_codegen(struct instruction_group_list *igl,
+			       struct rte_swx_pipeline *p,
+			       FILE *f)
+{
+	struct instruction_group *g;
+	uint32_t i;
+	int is_required = 0;
+
+	/* Check if code generation is required. */
+	TAILQ_FOREACH(g, igl, node)
+		if (g->first_instr_id < g->last_instr_id)
+			is_required = 1;
+
+	if (!is_required)
+		return;
+
+	/* Generate the code for the pipeline instruction array. */
+	fprintf(f,
+		"static const struct instruction pipeline_instructions[] = {\n");
+
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+		instruction_export_t func = export_table[instr->type];
+
+		func(instr, f);
+	}
+
+	fprintf(f, "};\n\n");
+
+	/* Generate the code for the pipeline functions: one function for each instruction group
+	 * that contains more than one instruction.
+	 */
+	TAILQ_FOREACH(g, igl, node) {
+		struct instruction *last_instr;
+		uint32_t j;
+
+		/* Skip if group contains a single instruction. */
+		if (g->last_instr_id == g->first_instr_id)
+			continue;
+
+		/* Generate new pipeline function. */
+		fprintf(f,
+			"void\n"
+			"pipeline_func_%u(struct rte_swx_pipeline *p)\n"
+			"{\n"
+			"\tstruct thread *t = &p->threads[p->thread_id];\n"
+			"\n",
+			g->group_id);
+
+		/* Generate the code for each pipeline instruction. */
+		for (j = g->first_instr_id; j <= g->last_instr_id; j++) {
+			struct instruction *instr = &p->instructions[j];
+			struct instruction_data *data = &p->instruction_data[j];
+
+			/* Label, if present. */
+			if (data->label[0])
+				fprintf(f, "\n%s : ", data->label);
+			else
+				fprintf(f, "\n\t");
+
+			/* TX instruction type. */
+			if (instruction_does_tx(instr)) {
+				pipeline_instr_does_tx_codegen(p, j, instr, f);
+				continue;
+			}
+
+			/* Jump instruction type. */
+			if (instruction_is_jmp(instr)) {
+				pipeline_instr_jmp_codegen(p, igl, j, instr, data, f);
+				continue;
+			}
+
+			/* Any other instruction type. */
+			fprintf(f,
+				"%s(p, t, &pipeline_instructions[%u]);\n",
+				instr_type_to_func(instr),
+				j);
+		}
+
+		/* Finalize the generated pipeline function. For some instructions such as TX,
+		 * emit-many-and-TX and unconditional jump, the next instruction has been already
+		 * decided unconditionally and the instruction pointer of the current thread set
+		 * accordingly; for all the other instructions, the instruction pointer must be
+		 * incremented now.
+		 */
+		last_instr = &p->instructions[g->last_instr_id];
+
+		if (!instruction_does_tx(last_instr) && (last_instr->type != INSTR_JMP))
+			fprintf(f,
+				"thread_ip_inc(p);\n");
+
+		fprintf(f,
+			"}\n"
+			"\n");
+	}
+}
+
 static int
-pipeline_codegen(struct rte_swx_pipeline *p)
+pipeline_codegen(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 {
 	struct action *a;
 	FILE *f = NULL;
 
-	if (!p)
-		return -EINVAL;
-
 	/* Create the .c file. */
 	f = fopen("/tmp/pipeline.c", "w");
 	if (!f)
@@ -11541,6 +12167,9 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 		fprintf(f, "\n");
 	}
 
+	/* Add the pipeline code. */
+	instruction_group_list_codegen(igl, p, f);
+
 	/* Close the .c file. */
 	fclose(f);
 
@@ -11550,12 +12179,22 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
+	struct instruction_group_list *igl = NULL;
 	int status = 0;
 
+	igl = instruction_group_list_create(p);
+	if (!igl) {
+		status = -ENOMEM;
+		goto free;
+	}
+
 	/* Code generation. */
-	status = pipeline_codegen(p);
+	status = pipeline_codegen(p, igl);
 	if (status)
-		return status;
+		goto free;
+
+free:
+	instruction_group_list_free(igl);
 
 	return status;
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 23/24] pipeline: build shared object for pipeline
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (20 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
                     ` (2 subsequent siblings)
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev; +Cc: Cunming Liang

Build the generated C file into a shared object library.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 131 +++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h |   1 +
 2 files changed, 132 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 27005872b4..a0e09110d6 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <arpa/inet.h>
+#include <dlfcn.h>
 
 #include "rte_swx_pipeline_internal.h"
 
@@ -8945,9 +8946,13 @@ rte_swx_pipeline_config(struct rte_swx_pipeline **p, int numa_node)
 void
 rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 {
+	void *lib;
+
 	if (!p)
 		return;
 
+	lib = p->lib;
+
 	free(p->instruction_data);
 	free(p->instructions);
 
@@ -8967,6 +8972,9 @@ rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 	struct_free(p);
 
 	free(p);
+
+	if (lib)
+		dlclose(lib);
 }
 
 int
@@ -12176,6 +12184,124 @@ pipeline_codegen(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 	return 0;
 }
 
+#ifndef RTE_SWX_PIPELINE_CMD_MAX_SIZE
+#define RTE_SWX_PIPELINE_CMD_MAX_SIZE 4096
+#endif
+
+static int
+pipeline_libload(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
+{
+	struct action *a;
+	struct instruction_group *g;
+	char *dir_in, *buffer = NULL;
+	const char *dir_out;
+	int status = 0;
+
+	/* Get the environment variables. */
+	dir_in = getenv("RTE_INSTALL_DIR");
+	if (!dir_in) {
+		status = -EINVAL;
+		goto free;
+	}
+
+	dir_out = "/tmp";
+
+	/* Memory allocation for the command buffer. */
+	buffer = malloc(RTE_SWX_PIPELINE_CMD_MAX_SIZE);
+	if (!buffer) {
+		status = -ENOMEM;
+		goto free;
+	}
+
+	snprintf(buffer,
+		 RTE_SWX_PIPELINE_CMD_MAX_SIZE,
+		 "gcc -c -O3 -fpic -Wno-deprecated-declarations -o %s/pipeline.o %s/pipeline.c "
+		 "-I %s/lib/pipeline "
+		 "-I %s/lib/eal/include "
+		 "-I %s/lib/eal/x86/include "
+		 "-I %s/lib/eal/include/generic "
+		 "-I %s/lib/meter "
+		 "-I %s/lib/port "
+		 "-I %s/lib/table "
+		 "-I %s/lib/pipeline "
+		 "-I %s/config "
+		 "-I %s/build "
+		 "-I %s/lib/eal/linux/include "
+		 ">%s/pipeline.log 2>&1 "
+		 "&& "
+		 "gcc -shared %s/pipeline.o -o %s/libpipeline.so "
+		 ">>%s/pipeline.log 2>&1",
+		 dir_out,
+		 dir_out,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_out,
+		 dir_out,
+		 dir_out,
+		 dir_out);
+
+	/* Build the shared object library. */
+	status = system(buffer);
+	if (status)
+		goto free;
+
+	/* Open library. */
+	snprintf(buffer,
+		 RTE_SWX_PIPELINE_CMD_MAX_SIZE,
+		 "%s/libpipeline.so",
+		 dir_out);
+
+	p->lib = dlopen(buffer, RTLD_LAZY);
+	if (!p->lib) {
+		status = -EIO;
+		goto free;
+	}
+
+	/* Get the action function symbols. */
+	TAILQ_FOREACH(a, &p->actions, node) {
+		snprintf(buffer, RTE_SWX_PIPELINE_CMD_MAX_SIZE, "action_%s_run", a->name);
+
+		p->action_funcs[a->id] = dlsym(p->lib, buffer);
+		if (!p->action_funcs[a->id]) {
+			status = -EINVAL;
+			goto free;
+		}
+	}
+
+	/* Get the pipeline function symbols. */
+	TAILQ_FOREACH(g, igl, node) {
+		if (g->first_instr_id == g->last_instr_id)
+			continue;
+
+		snprintf(buffer, RTE_SWX_PIPELINE_CMD_MAX_SIZE, "pipeline_func_%u", g->group_id);
+
+		g->func = dlsym(p->lib, buffer);
+		if (!g->func) {
+			status = -EINVAL;
+			goto free;
+		}
+	}
+
+free:
+	if (status && p->lib) {
+		dlclose(p->lib);
+		p->lib = NULL;
+	}
+
+	free(buffer);
+
+	return status;
+}
+
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
@@ -12193,6 +12319,11 @@ pipeline_compile(struct rte_swx_pipeline *p)
 	if (status)
 		goto free;
 
+	/* Build and load the shared object library. */
+	status = pipeline_libload(p, igl);
+	if (status)
+		goto free;
+
 free:
 	instruction_group_list_free(igl);
 
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 4ad6dd42dd..a210df4856 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1400,6 +1400,7 @@ struct rte_swx_pipeline {
 	struct instruction *instructions;
 	struct instruction_data *instruction_data;
 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
+	void *lib;
 
 	uint32_t n_structs;
 	uint32_t n_ports_in;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V2 24/24] pipeline: enable pipeline compilation
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (21 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
@ 2021-09-10 13:37   ` Cristian Dumitrescu
  2021-09-10 14:09   ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Bruce Richardson
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
  24 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-10 13:37 UTC (permalink / raw)
  To: dev

Commit the pipeline changes when the compilation process is
successful: change the table lookup instructions to execute the action
function for each action, replace the regular pipeline instructions
with the custom instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 55 +++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index a0e09110d6..e08cbf16ee 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -12302,6 +12302,58 @@ pipeline_libload(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 	return status;
 }
 
+static void
+pipeline_adjust(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
+{
+	struct instruction_group *g;
+	uint32_t i;
+
+	/* Pipeline table instructions. */
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+
+		if (instr->type == INSTR_TABLE)
+			instr->type = INSTR_TABLE_AF;
+
+		if (instr->type == INSTR_LEARNER)
+			instr->type = INSTR_LEARNER_AF;
+	}
+
+	/* Pipeline custom instructions. */
+	i = 0;
+	TAILQ_FOREACH(g, igl, node) {
+		struct instruction *instr = &p->instructions[g->first_instr_id];
+		uint32_t j;
+
+		if (g->first_instr_id == g->last_instr_id)
+			continue;
+
+		/* Install a new custom instruction. */
+		instruction_table[INSTR_CUSTOM_0 + i] = g->func;
+
+		/* First instruction of the group: change its type to the new custom instruction. */
+		instr->type = INSTR_CUSTOM_0 + i;
+
+		/* All the subsequent instructions of the group: invalidate. */
+		for (j = g->first_instr_id + 1; j <= g->last_instr_id; j++) {
+			struct instruction_data *data = &p->instruction_data[j];
+
+			data->invalid = 1;
+		}
+
+		i++;
+	}
+
+	/* Remove the invalidated instructions. */
+	p->n_instructions = instr_compact(p->instructions, p->instruction_data, p->n_instructions);
+
+	/* Resolve the jump destination for any "standalone" jump instructions (i.e. those jump
+	 * instructions that are the only instruction within their group, so they were left
+	 * unmodified).
+	 */
+	instr_jmp_resolve(p->instructions, p->instruction_data, p->n_instructions);
+}
+
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
@@ -12324,6 +12376,9 @@ pipeline_compile(struct rte_swx_pipeline *p)
 	if (status)
 		goto free;
 
+	/* Adjust instructions. */
+	pipeline_adjust(p, igl);
+
 free:
 	instruction_group_list_free(igl);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* Re: [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (22 preceding siblings ...)
  2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
@ 2021-09-10 14:09   ` Bruce Richardson
  2021-09-13 17:07     ` Dumitrescu, Cristian
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
  24 siblings, 1 reply; 79+ messages in thread
From: Bruce Richardson @ 2021-09-10 14:09 UTC (permalink / raw)
  To: Cristian Dumitrescu; +Cc: dev

On Fri, Sep 10, 2021 at 02:36:50PM +0100, Cristian Dumitrescu wrote:
> Start to consolidate the data structures and inline functions required
> by the pipeline instructions into an internal header file.
> 
> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> ---
> Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")
> 
>  lib/pipeline/meson.build                 |    4 +
>  lib/pipeline/rte_swx_pipeline.c          | 1373 +--------------------
>  lib/pipeline/rte_swx_pipeline_internal.h | 1383 ++++++++++++++++++++++
>  3 files changed, 1388 insertions(+), 1372 deletions(-)
>  create mode 100644 lib/pipeline/rte_swx_pipeline_internal.h
> 
> diff --git a/lib/pipeline/meson.build b/lib/pipeline/meson.build
> index 9132bb517a..ec009631bf 100644
> --- a/lib/pipeline/meson.build
> +++ b/lib/pipeline/meson.build
> @@ -18,3 +18,7 @@ headers = files(
>          'rte_swx_ctl.h',
>  )
>  deps += ['port', 'table', 'meter', 'sched', 'cryptodev']
> +
> +indirect_headers += files(
> +        'rte_swx_pipeline_internal.h',
> +)

internal headers should not appear in meson.build at all, as they are not
for installation. "indirect_headers" is for non-internal, public headers
which are not directly included by apps, but included via another public
header. For example, rte_ethdev_core.h should not be included directly, but
by including rte_ethdev.h which will then pull it in. [The reason these are
separated out in meson.build is for header sanity checking, to check that
normal, non-indirect public headers include all their dependent headers]

^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file
  2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
                     ` (23 preceding siblings ...)
  2021-09-10 14:09   ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Bruce Richardson
@ 2021-09-13 16:44   ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
                       ` (25 more replies)
  24 siblings, 26 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Start to consolidate the data structures and inline functions required
by the pipeline instructions into an internal header file.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")

V3:
-fixed meson.build

 lib/pipeline/rte_swx_pipeline.c          | 1373 +--------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 1383 ++++++++++++++++++++++
 2 files changed, 1384 insertions(+), 1372 deletions(-)
 create mode 100644 lib/pipeline/rte_swx_pipeline_internal.h

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index f89a134a52..ae9b2056db 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2,24 +2,11 @@
  * Copyright(c) 2020 Intel Corporation
  */
 #include <stdlib.h>
-#include <string.h>
 #include <stdio.h>
 #include <errno.h>
-#include <inttypes.h>
-#include <sys/queue.h>
 #include <arpa/inet.h>
 
-#include <rte_common.h>
-#include <rte_prefetch.h>
-#include <rte_byteorder.h>
-#include <rte_cycles.h>
-#include <rte_meter.h>
-
-#include <rte_swx_table_selector.h>
-#include <rte_swx_table_learner.h>
-
-#include "rte_swx_pipeline.h"
-#include "rte_swx_ctl.h"
+#include "rte_swx_pipeline_internal.h"
 
 #define CHECK(condition, err_code)                                             \
 do {                                                                           \
@@ -40,22 +27,9 @@ do {                                                                           \
 	       RTE_SWX_INSTRUCTION_SIZE),                                      \
 	      err_code)
 
-#ifndef TRACE_LEVEL
-#define TRACE_LEVEL 0
-#endif
-
-#if TRACE_LEVEL
-#define TRACE(...) printf(__VA_ARGS__)
-#else
-#define TRACE(...)
-#endif
-
 /*
  * Environment.
  */
-#define ntoh64(x) rte_be_to_cpu_64(x)
-#define hton64(x) rte_cpu_to_be_64(x)
-
 #ifndef RTE_SWX_PIPELINE_HUGE_PAGES_DISABLE
 
 #include <rte_malloc.h>
@@ -103,1351 +77,6 @@ env_free(void *start, size_t size)
 
 #endif
 
-/*
- * Struct.
- */
-struct field {
-	char name[RTE_SWX_NAME_SIZE];
-	uint32_t n_bits;
-	uint32_t offset;
-	int var_size;
-};
-
-struct struct_type {
-	TAILQ_ENTRY(struct_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct field *fields;
-	uint32_t n_fields;
-	uint32_t n_bits;
-	uint32_t n_bits_min;
-	int var_size;
-};
-
-TAILQ_HEAD(struct_type_tailq, struct_type);
-
-/*
- * Input port.
- */
-struct port_in_type {
-	TAILQ_ENTRY(port_in_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_swx_port_in_ops ops;
-};
-
-TAILQ_HEAD(port_in_type_tailq, port_in_type);
-
-struct port_in {
-	TAILQ_ENTRY(port_in) node;
-	struct port_in_type *type;
-	void *obj;
-	uint32_t id;
-};
-
-TAILQ_HEAD(port_in_tailq, port_in);
-
-struct port_in_runtime {
-	rte_swx_port_in_pkt_rx_t pkt_rx;
-	void *obj;
-};
-
-/*
- * Output port.
- */
-struct port_out_type {
-	TAILQ_ENTRY(port_out_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_swx_port_out_ops ops;
-};
-
-TAILQ_HEAD(port_out_type_tailq, port_out_type);
-
-struct port_out {
-	TAILQ_ENTRY(port_out) node;
-	struct port_out_type *type;
-	void *obj;
-	uint32_t id;
-};
-
-TAILQ_HEAD(port_out_tailq, port_out);
-
-struct port_out_runtime {
-	rte_swx_port_out_pkt_tx_t pkt_tx;
-	rte_swx_port_out_flush_t flush;
-	void *obj;
-};
-
-/*
- * Extern object.
- */
-struct extern_type_member_func {
-	TAILQ_ENTRY(extern_type_member_func) node;
-	char name[RTE_SWX_NAME_SIZE];
-	rte_swx_extern_type_member_func_t func;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
-
-struct extern_type {
-	TAILQ_ENTRY(extern_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *mailbox_struct_type;
-	rte_swx_extern_type_constructor_t constructor;
-	rte_swx_extern_type_destructor_t destructor;
-	struct extern_type_member_func_tailq funcs;
-	uint32_t n_funcs;
-};
-
-TAILQ_HEAD(extern_type_tailq, extern_type);
-
-struct extern_obj {
-	TAILQ_ENTRY(extern_obj) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct extern_type *type;
-	void *obj;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_obj_tailq, extern_obj);
-
-#ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
-#define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
-#endif
-
-struct extern_obj_runtime {
-	void *obj;
-	uint8_t *mailbox;
-	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
-};
-
-/*
- * Extern function.
- */
-struct extern_func {
-	TAILQ_ENTRY(extern_func) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *mailbox_struct_type;
-	rte_swx_extern_func_t func;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(extern_func_tailq, extern_func);
-
-struct extern_func_runtime {
-	uint8_t *mailbox;
-	rte_swx_extern_func_t func;
-};
-
-/*
- * Header.
- */
-struct header {
-	TAILQ_ENTRY(header) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *st;
-	uint32_t struct_id;
-	uint32_t id;
-};
-
-TAILQ_HEAD(header_tailq, header);
-
-struct header_runtime {
-	uint8_t *ptr0;
-	uint32_t n_bytes;
-};
-
-struct header_out_runtime {
-	uint8_t *ptr0;
-	uint8_t *ptr;
-	uint32_t n_bytes;
-};
-
-/*
- * Instruction.
- */
-
-/* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
- * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
- * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
- * when transferred to packet meta-data and in NBO when transferred to packet
- * headers.
- */
-
-/* Notation conventions:
- *    -Header field: H = h.header.field (dst/src)
- *    -Meta-data field: M = m.field (dst/src)
- *    -Extern object mailbox field: E = e.field (dst/src)
- *    -Extern function mailbox field: F = f.field (dst/src)
- *    -Table action data field: T = t.field (src only)
- *    -Immediate value: I = 32-bit unsigned value (src only)
- */
-
-enum instruction_type {
-	/* rx m.port_in */
-	INSTR_RX,
-
-	/* tx port_out
-	 * port_out = MI
-	 */
-	INSTR_TX,   /* port_out = M */
-	INSTR_TX_I, /* port_out = I */
-
-	/* extract h.header */
-	INSTR_HDR_EXTRACT,
-	INSTR_HDR_EXTRACT2,
-	INSTR_HDR_EXTRACT3,
-	INSTR_HDR_EXTRACT4,
-	INSTR_HDR_EXTRACT5,
-	INSTR_HDR_EXTRACT6,
-	INSTR_HDR_EXTRACT7,
-	INSTR_HDR_EXTRACT8,
-
-	/* extract h.header m.last_field_size */
-	INSTR_HDR_EXTRACT_M,
-
-	/* lookahead h.header */
-	INSTR_HDR_LOOKAHEAD,
-
-	/* emit h.header */
-	INSTR_HDR_EMIT,
-	INSTR_HDR_EMIT_TX,
-	INSTR_HDR_EMIT2_TX,
-	INSTR_HDR_EMIT3_TX,
-	INSTR_HDR_EMIT4_TX,
-	INSTR_HDR_EMIT5_TX,
-	INSTR_HDR_EMIT6_TX,
-	INSTR_HDR_EMIT7_TX,
-	INSTR_HDR_EMIT8_TX,
-
-	/* validate h.header */
-	INSTR_HDR_VALIDATE,
-
-	/* invalidate h.header */
-	INSTR_HDR_INVALIDATE,
-
-	/* mov dst src
-	 * dst = src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_MOV,    /* dst = MEF, src = MEFT */
-	INSTR_MOV_MH, /* dst = MEF, src = H */
-	INSTR_MOV_HM, /* dst = H, src = MEFT */
-	INSTR_MOV_HH, /* dst = H, src = H */
-	INSTR_MOV_I,  /* dst = HMEF, src = I */
-
-	/* dma h.header t.field
-	 * memcpy(h.header, t.field, sizeof(h.header))
-	 */
-	INSTR_DMA_HT,
-	INSTR_DMA_HT2,
-	INSTR_DMA_HT3,
-	INSTR_DMA_HT4,
-	INSTR_DMA_HT5,
-	INSTR_DMA_HT6,
-	INSTR_DMA_HT7,
-	INSTR_DMA_HT8,
-
-	/* add dst src
-	 * dst += src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
-	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
-	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
-	INSTR_ALU_ADD_HH, /* dst = H, src = H */
-	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
-	INSTR_ALU_ADD_HI, /* dst = H, src = I */
-
-	/* sub dst src
-	 * dst -= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SUB_HH, /* dst = H, src = H */
-	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SUB_HI, /* dst = H, src = I */
-
-	/* ckadd dst src
-	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
-	 * dst = H, src = {H, h.header}
-	 */
-	INSTR_ALU_CKADD_FIELD,    /* src = H */
-	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
-	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
-
-	/* cksub dst src
-	 * dst = dst '- src
-	 * dst = H, src = H
-	 */
-	INSTR_ALU_CKSUB_FIELD,
-
-	/* and dst src
-	 * dst &= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
-	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_AND_HH, /* dst = H, src = H */
-	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
-
-	/* or dst src
-	 * dst |= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_OR_HH, /* dst = H, src = H */
-	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
-
-	/* xor dst src
-	 * dst ^= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
-	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
-	INSTR_ALU_XOR_HH, /* dst = H, src = H */
-	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
-
-	/* shl dst src
-	 * dst <<= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SHL_HH, /* dst = H, src = H */
-	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SHL_HI, /* dst = H, src = I */
-
-	/* shr dst src
-	 * dst >>= src
-	 * dst = HMEF, src = HMEFTI
-	 */
-	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
-	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
-	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
-	INSTR_ALU_SHR_HH, /* dst = H, src = H */
-	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
-	INSTR_ALU_SHR_HI, /* dst = H, src = I */
-
-	/* regprefetch REGARRAY index
-	 * prefetch REGARRAY[index]
-	 * index = HMEFTI
-	 */
-	INSTR_REGPREFETCH_RH, /* index = H */
-	INSTR_REGPREFETCH_RM, /* index = MEFT */
-	INSTR_REGPREFETCH_RI, /* index = I */
-
-	/* regrd dst REGARRAY index
-	 * dst = REGARRAY[index]
-	 * dst = HMEF, index = HMEFTI
-	 */
-	INSTR_REGRD_HRH, /* dst = H, index = H */
-	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
-	INSTR_REGRD_HRI, /* dst = H, index = I */
-	INSTR_REGRD_MRH, /* dst = MEF, index = H */
-	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
-	INSTR_REGRD_MRI, /* dst = MEF, index = I */
-
-	/* regwr REGARRAY index src
-	 * REGARRAY[index] = src
-	 * index = HMEFTI, src = HMEFTI
-	 */
-	INSTR_REGWR_RHH, /* index = H, src = H */
-	INSTR_REGWR_RHM, /* index = H, src = MEFT */
-	INSTR_REGWR_RHI, /* index = H, src = I */
-	INSTR_REGWR_RMH, /* index = MEFT, src = H */
-	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
-	INSTR_REGWR_RMI, /* index = MEFT, src = I */
-	INSTR_REGWR_RIH, /* index = I, src = H */
-	INSTR_REGWR_RIM, /* index = I, src = MEFT */
-	INSTR_REGWR_RII, /* index = I, src = I */
-
-	/* regadd REGARRAY index src
-	 * REGARRAY[index] += src
-	 * index = HMEFTI, src = HMEFTI
-	 */
-	INSTR_REGADD_RHH, /* index = H, src = H */
-	INSTR_REGADD_RHM, /* index = H, src = MEFT */
-	INSTR_REGADD_RHI, /* index = H, src = I */
-	INSTR_REGADD_RMH, /* index = MEFT, src = H */
-	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
-	INSTR_REGADD_RMI, /* index = MEFT, src = I */
-	INSTR_REGADD_RIH, /* index = I, src = H */
-	INSTR_REGADD_RIM, /* index = I, src = MEFT */
-	INSTR_REGADD_RII, /* index = I, src = I */
-
-	/* metprefetch METARRAY index
-	 * prefetch METARRAY[index]
-	 * index = HMEFTI
-	 */
-	INSTR_METPREFETCH_H, /* index = H */
-	INSTR_METPREFETCH_M, /* index = MEFT */
-	INSTR_METPREFETCH_I, /* index = I */
-
-	/* meter METARRAY index length color_in color_out
-	 * color_out = meter(METARRAY[index], length, color_in)
-	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
-	 */
-	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
-	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
-	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
-	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
-	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
-	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
-	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
-	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
-	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
-	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
-	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
-	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
-
-	/* table TABLE */
-	INSTR_TABLE,
-	INSTR_SELECTOR,
-	INSTR_LEARNER,
-
-	/* learn LEARNER ACTION_NAME */
-	INSTR_LEARNER_LEARN,
-
-	/* forget */
-	INSTR_LEARNER_FORGET,
-
-	/* extern e.obj.func */
-	INSTR_EXTERN_OBJ,
-
-	/* extern f.func */
-	INSTR_EXTERN_FUNC,
-
-	/* jmp LABEL
-	 * Unconditional jump
-	 */
-	INSTR_JMP,
-
-	/* jmpv LABEL h.header
-	 * Jump if header is valid
-	 */
-	INSTR_JMP_VALID,
-
-	/* jmpnv LABEL h.header
-	 * Jump if header is invalid
-	 */
-	INSTR_JMP_INVALID,
-
-	/* jmph LABEL
-	 * Jump if table lookup hit
-	 */
-	INSTR_JMP_HIT,
-
-	/* jmpnh LABEL
-	 * Jump if table lookup miss
-	 */
-	INSTR_JMP_MISS,
-
-	/* jmpa LABEL ACTION
-	 * Jump if action run
-	 */
-	INSTR_JMP_ACTION_HIT,
-
-	/* jmpna LABEL ACTION
-	 * Jump if action not run
-	 */
-	INSTR_JMP_ACTION_MISS,
-
-	/* jmpeq LABEL a b
-	 * Jump if a is equal to b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
-	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
-	INSTR_JMP_EQ_HH, /* a = H, b = H */
-	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
-
-	/* jmpneq LABEL a b
-	 * Jump if a is not equal to b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
-	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
-	INSTR_JMP_NEQ_HH, /* a = H, b = H */
-	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
-
-	/* jmplt LABEL a b
-	 * Jump if a is less than b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
-	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
-	INSTR_JMP_LT_HH, /* a = H, b = H */
-	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
-	INSTR_JMP_LT_HI, /* a = H, b = I */
-
-	/* jmpgt LABEL a b
-	 * Jump if a is greater than b
-	 * a = HMEFT, b = HMEFTI
-	 */
-	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
-	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
-	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
-	INSTR_JMP_GT_HH, /* a = H, b = H */
-	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
-	INSTR_JMP_GT_HI, /* a = H, b = I */
-
-	/* return
-	 * Return from action
-	 */
-	INSTR_RETURN,
-};
-
-struct instr_operand {
-	uint8_t struct_id;
-	uint8_t n_bits;
-	uint8_t offset;
-	uint8_t pad;
-};
-
-struct instr_io {
-	struct {
-		union {
-			struct {
-				uint8_t offset;
-				uint8_t n_bits;
-				uint8_t pad[2];
-			};
-
-			uint32_t val;
-		};
-	} io;
-
-	struct {
-		uint8_t header_id[8];
-		uint8_t struct_id[8];
-		uint8_t n_bytes[8];
-	} hdr;
-};
-
-struct instr_hdr_validity {
-	uint8_t header_id;
-};
-
-struct instr_table {
-	uint8_t table_id;
-};
-
-struct instr_learn {
-	uint8_t action_id;
-};
-
-struct instr_extern_obj {
-	uint8_t ext_obj_id;
-	uint8_t func_id;
-};
-
-struct instr_extern_func {
-	uint8_t ext_func_id;
-};
-
-struct instr_dst_src {
-	struct instr_operand dst;
-	union {
-		struct instr_operand src;
-		uint64_t src_val;
-	};
-};
-
-struct instr_regarray {
-	uint8_t regarray_id;
-	uint8_t pad[3];
-
-	union {
-		struct instr_operand idx;
-		uint32_t idx_val;
-	};
-
-	union {
-		struct instr_operand dstsrc;
-		uint64_t dstsrc_val;
-	};
-};
-
-struct instr_meter {
-	uint8_t metarray_id;
-	uint8_t pad[3];
-
-	union {
-		struct instr_operand idx;
-		uint32_t idx_val;
-	};
-
-	struct instr_operand length;
-
-	union {
-		struct instr_operand color_in;
-		uint32_t color_in_val;
-	};
-
-	struct instr_operand color_out;
-};
-
-struct instr_dma {
-	struct {
-		uint8_t header_id[8];
-		uint8_t struct_id[8];
-	} dst;
-
-	struct {
-		uint8_t offset[8];
-	} src;
-
-	uint16_t n_bytes[8];
-};
-
-struct instr_jmp {
-	struct instruction *ip;
-
-	union {
-		struct instr_operand a;
-		uint8_t header_id;
-		uint8_t action_id;
-	};
-
-	union {
-		struct instr_operand b;
-		uint64_t b_val;
-	};
-};
-
-struct instruction {
-	enum instruction_type type;
-	union {
-		struct instr_io io;
-		struct instr_hdr_validity valid;
-		struct instr_dst_src mov;
-		struct instr_regarray regarray;
-		struct instr_meter meter;
-		struct instr_dma dma;
-		struct instr_dst_src alu;
-		struct instr_table table;
-		struct instr_learn learn;
-		struct instr_extern_obj ext_obj;
-		struct instr_extern_func ext_func;
-		struct instr_jmp jmp;
-	};
-};
-
-struct instruction_data {
-	char label[RTE_SWX_NAME_SIZE];
-	char jmp_label[RTE_SWX_NAME_SIZE];
-	uint32_t n_users; /* user = jmp instruction to this instruction. */
-	int invalid;
-};
-
-/*
- * Action.
- */
-struct action {
-	TAILQ_ENTRY(action) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct struct_type *st;
-	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
-	struct instruction *instructions;
-	uint32_t n_instructions;
-	uint32_t id;
-};
-
-TAILQ_HEAD(action_tailq, action);
-
-/*
- * Table.
- */
-struct table_type {
-	TAILQ_ENTRY(table_type) node;
-	char name[RTE_SWX_NAME_SIZE];
-	enum rte_swx_table_match_type match_type;
-	struct rte_swx_table_ops ops;
-};
-
-TAILQ_HEAD(table_type_tailq, table_type);
-
-struct match_field {
-	enum rte_swx_table_match_type match_type;
-	struct field *field;
-};
-
-struct table {
-	TAILQ_ENTRY(table) node;
-	char name[RTE_SWX_NAME_SIZE];
-	char args[RTE_SWX_NAME_SIZE];
-	struct table_type *type; /* NULL when n_fields == 0. */
-
-	/* Match. */
-	struct match_field *fields;
-	uint32_t n_fields;
-	struct header *header; /* Only valid when n_fields > 0. */
-
-	/* Action. */
-	struct action **actions;
-	struct action *default_action;
-	uint8_t *default_action_data;
-	uint32_t n_actions;
-	int default_action_is_const;
-	uint32_t action_data_size_max;
-
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(table_tailq, table);
-
-struct table_runtime {
-	rte_swx_table_lookup_t func;
-	void *mailbox;
-	uint8_t **key;
-};
-
-struct table_statistics {
-	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
-	uint64_t *n_pkts_action;
-};
-
-/*
- * Selector.
- */
-struct selector {
-	TAILQ_ENTRY(selector) node;
-	char name[RTE_SWX_NAME_SIZE];
-
-	struct field *group_id_field;
-	struct field **selector_fields;
-	uint32_t n_selector_fields;
-	struct header *selector_header;
-	struct field *member_id_field;
-
-	uint32_t n_groups_max;
-	uint32_t n_members_per_group_max;
-
-	uint32_t id;
-};
-
-TAILQ_HEAD(selector_tailq, selector);
-
-struct selector_runtime {
-	void *mailbox;
-	uint8_t **group_id_buffer;
-	uint8_t **selector_buffer;
-	uint8_t **member_id_buffer;
-};
-
-struct selector_statistics {
-	uint64_t n_pkts;
-};
-
-/*
- * Learner table.
- */
-struct learner {
-	TAILQ_ENTRY(learner) node;
-	char name[RTE_SWX_NAME_SIZE];
-
-	/* Match. */
-	struct field **fields;
-	uint32_t n_fields;
-	struct header *header;
-
-	/* Action. */
-	struct action **actions;
-	struct field **action_arg;
-	struct action *default_action;
-	uint8_t *default_action_data;
-	uint32_t n_actions;
-	int default_action_is_const;
-	uint32_t action_data_size_max;
-
-	uint32_t size;
-	uint32_t timeout;
-	uint32_t id;
-};
-
-TAILQ_HEAD(learner_tailq, learner);
-
-struct learner_runtime {
-	void *mailbox;
-	uint8_t **key;
-	uint8_t **action_data;
-};
-
-struct learner_statistics {
-	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
-	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
-	uint64_t n_pkts_forget;
-	uint64_t *n_pkts_action;
-};
-
-/*
- * Register array.
- */
-struct regarray {
-	TAILQ_ENTRY(regarray) node;
-	char name[RTE_SWX_NAME_SIZE];
-	uint64_t init_val;
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(regarray_tailq, regarray);
-
-struct regarray_runtime {
-	uint64_t *regarray;
-	uint32_t size_mask;
-};
-
-/*
- * Meter array.
- */
-struct meter_profile {
-	TAILQ_ENTRY(meter_profile) node;
-	char name[RTE_SWX_NAME_SIZE];
-	struct rte_meter_trtcm_params params;
-	struct rte_meter_trtcm_profile profile;
-	uint32_t n_users;
-};
-
-TAILQ_HEAD(meter_profile_tailq, meter_profile);
-
-struct metarray {
-	TAILQ_ENTRY(metarray) node;
-	char name[RTE_SWX_NAME_SIZE];
-	uint32_t size;
-	uint32_t id;
-};
-
-TAILQ_HEAD(metarray_tailq, metarray);
-
-struct meter {
-	struct rte_meter_trtcm m;
-	struct meter_profile *profile;
-	enum rte_color color_mask;
-	uint8_t pad[20];
-
-	uint64_t n_pkts[RTE_COLORS];
-	uint64_t n_bytes[RTE_COLORS];
-};
-
-struct metarray_runtime {
-	struct meter *metarray;
-	uint32_t size_mask;
-};
-
-/*
- * Pipeline.
- */
-struct thread {
-	/* Packet. */
-	struct rte_swx_pkt pkt;
-	uint8_t *ptr;
-
-	/* Structures. */
-	uint8_t **structs;
-
-	/* Packet headers. */
-	struct header_runtime *headers; /* Extracted or generated headers. */
-	struct header_out_runtime *headers_out; /* Emitted headers. */
-	uint8_t *header_storage;
-	uint8_t *header_out_storage;
-	uint64_t valid_headers;
-	uint32_t n_headers_out;
-
-	/* Packet meta-data. */
-	uint8_t *metadata;
-
-	/* Tables. */
-	struct table_runtime *tables;
-	struct selector_runtime *selectors;
-	struct learner_runtime *learners;
-	struct rte_swx_table_state *table_state;
-	uint64_t action_id;
-	int hit; /* 0 = Miss, 1 = Hit. */
-	uint32_t learner_id;
-	uint64_t time;
-
-	/* Extern objects and functions. */
-	struct extern_obj_runtime *extern_objs;
-	struct extern_func_runtime *extern_funcs;
-
-	/* Instructions. */
-	struct instruction *ip;
-	struct instruction *ret;
-};
-
-#define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
-#define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
-#define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
-
-#define HEADER_VALID(thread, header_id) \
-	MASK64_BIT_GET((thread)->valid_headers, header_id)
-
-#define ALU(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define ALU_MH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#define ALU_HM(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#define ALU_HM_FAST(thread, ip, operator)  \
-{                                                                                 \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
-	uint64_t dst64 = *dst64_ptr;                                              \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
-	uint64_t dst = dst64 & dst64_mask;                                        \
-										  \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
-	uint64_t src64 = *src64_ptr;                                              \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
-	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
-										  \
-	uint64_t result = dst operator src;                                       \
-										  \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
-}
-
-#define ALU_HH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#define ALU_HH_FAST(thread, ip, operator)  \
-{                                                                                             \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
-	uint64_t dst64 = *dst64_ptr;                                                          \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
-	uint64_t dst = dst64 & dst64_mask;                                                    \
-											      \
-	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
-	uint64_t src64 = *src64_ptr;                                                          \
-	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
-											      \
-	uint64_t result = dst operator src;                                                   \
-											      \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
-}
-
-#else
-
-#define ALU_MH ALU
-#define ALU_HM ALU
-#define ALU_HM_FAST ALU
-#define ALU_HH ALU
-#define ALU_HH_FAST ALU
-
-#endif
-
-#define ALU_I(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = dst64 & dst64_mask;                                     \
-									       \
-	uint64_t src = (ip)->alu.src_val;                                      \
-									       \
-	uint64_t result = dst operator src;                                    \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
-}
-
-#define ALU_MI ALU_I
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define ALU_HI(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
-	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
-									       \
-	uint64_t src = (ip)->alu.src_val;                                      \
-									       \
-	uint64_t result = dst operator src;                                    \
-	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
-}
-
-#else
-
-#define ALU_HI ALU_I
-
-#endif
-
-#define MOV(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define MOV_MH(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#define MOV_HM(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
-	uint64_t src = src64 & src64_mask;                                     \
-									       \
-	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
-	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
-}
-
-#define MOV_HH(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
-	uint64_t src64 = *src64_ptr;                                           \
-									       \
-	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
-	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
-	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
-}
-
-#else
-
-#define MOV_MH MOV
-#define MOV_HM MOV
-#define MOV_HH MOV
-
-#endif
-
-#define MOV_I(thread, ip)  \
-{                                                                              \
-	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
-	uint64_t dst64 = *dst64_ptr;                                           \
-	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
-									       \
-	uint64_t src = (ip)->mov.src_val;                                      \
-									       \
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
-}
-
-#define JMP_CMP(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
-	uint64_t b = b64 & b64_mask;                                           \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define JMP_CMP_MH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HM(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
-	uint64_t b = b64 & b64_mask;                                           \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HH(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_HH_FAST(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
-									       \
-	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
-	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
-	uint64_t b64 = *b64_ptr;                                               \
-	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#else
-
-#define JMP_CMP_MH JMP_CMP
-#define JMP_CMP_HM JMP_CMP
-#define JMP_CMP_HH JMP_CMP
-#define JMP_CMP_HH_FAST JMP_CMP
-
-#endif
-
-#define JMP_CMP_I(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
-	uint64_t a = a64 & a64_mask;                                           \
-									       \
-	uint64_t b = (ip)->jmp.b_val;                                          \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#define JMP_CMP_MI JMP_CMP_I
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-#define JMP_CMP_HI(thread, ip, operator)  \
-{                                                                              \
-	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
-	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
-	uint64_t a64 = *a64_ptr;                                               \
-	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
-									       \
-	uint64_t b = (ip)->jmp.b_val;                                          \
-									       \
-	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
-}
-
-#else
-
-#define JMP_CMP_HI JMP_CMP_I
-
-#endif
-
-#define METADATA_READ(thread, offset, n_bits)                                  \
-({                                                                             \
-	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
-	uint64_t m64 = *m64_ptr;                                               \
-	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
-	(m64 & m64_mask);                                                      \
-})
-
-#define METADATA_WRITE(thread, offset, n_bits, value)                          \
-{                                                                              \
-	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
-	uint64_t m64 = *m64_ptr;                                               \
-	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
-									       \
-	uint64_t m_new = value;                                                \
-									       \
-	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
-}
-
-#ifndef RTE_SWX_PIPELINE_THREADS_MAX
-#define RTE_SWX_PIPELINE_THREADS_MAX 16
-#endif
-
-struct rte_swx_pipeline {
-	struct struct_type_tailq struct_types;
-	struct port_in_type_tailq port_in_types;
-	struct port_in_tailq ports_in;
-	struct port_out_type_tailq port_out_types;
-	struct port_out_tailq ports_out;
-	struct extern_type_tailq extern_types;
-	struct extern_obj_tailq extern_objs;
-	struct extern_func_tailq extern_funcs;
-	struct header_tailq headers;
-	struct struct_type *metadata_st;
-	uint32_t metadata_struct_id;
-	struct action_tailq actions;
-	struct table_type_tailq table_types;
-	struct table_tailq tables;
-	struct selector_tailq selectors;
-	struct learner_tailq learners;
-	struct regarray_tailq regarrays;
-	struct meter_profile_tailq meter_profiles;
-	struct metarray_tailq metarrays;
-
-	struct port_in_runtime *in;
-	struct port_out_runtime *out;
-	struct instruction **action_instructions;
-	struct rte_swx_table_state *table_state;
-	struct table_statistics *table_stats;
-	struct selector_statistics *selector_stats;
-	struct learner_statistics *learner_stats;
-	struct regarray_runtime *regarray_runtime;
-	struct metarray_runtime *metarray_runtime;
-	struct instruction *instructions;
-	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
-
-	uint32_t n_structs;
-	uint32_t n_ports_in;
-	uint32_t n_ports_out;
-	uint32_t n_extern_objs;
-	uint32_t n_extern_funcs;
-	uint32_t n_actions;
-	uint32_t n_tables;
-	uint32_t n_selectors;
-	uint32_t n_learners;
-	uint32_t n_regarrays;
-	uint32_t n_metarrays;
-	uint32_t n_headers;
-	uint32_t thread_id;
-	uint32_t port_id;
-	uint32_t n_instructions;
-	int build_done;
-	int numa_node;
-};
-
 /*
  * Struct.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
new file mode 100644
index 0000000000..5d80dd8451
--- /dev/null
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -0,0 +1,1383 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+#ifndef __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
+#define __INCLUDE_RTE_SWX_PIPELINE_INTERNAL_H__
+
+#include <inttypes.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_meter.h>
+
+#include <rte_swx_table_selector.h>
+#include <rte_swx_table_learner.h>
+#include <rte_swx_pipeline.h>
+#include <rte_swx_ctl.h>
+
+#ifndef TRACE_LEVEL
+#define TRACE_LEVEL 0
+#endif
+
+#if TRACE_LEVEL
+#define TRACE(...) printf(__VA_ARGS__)
+#else
+#define TRACE(...)
+#endif
+
+/*
+ * Environment.
+ */
+#define ntoh64(x) rte_be_to_cpu_64(x)
+#define hton64(x) rte_cpu_to_be_64(x)
+
+/*
+ * Struct.
+ */
+struct field {
+	char name[RTE_SWX_NAME_SIZE];
+	uint32_t n_bits;
+	uint32_t offset;
+	int var_size;
+};
+
+struct struct_type {
+	TAILQ_ENTRY(struct_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct field *fields;
+	uint32_t n_fields;
+	uint32_t n_bits;
+	uint32_t n_bits_min;
+	int var_size;
+};
+
+TAILQ_HEAD(struct_type_tailq, struct_type);
+
+/*
+ * Input port.
+ */
+struct port_in_type {
+	TAILQ_ENTRY(port_in_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_swx_port_in_ops ops;
+};
+
+TAILQ_HEAD(port_in_type_tailq, port_in_type);
+
+struct port_in {
+	TAILQ_ENTRY(port_in) node;
+	struct port_in_type *type;
+	void *obj;
+	uint32_t id;
+};
+
+TAILQ_HEAD(port_in_tailq, port_in);
+
+struct port_in_runtime {
+	rte_swx_port_in_pkt_rx_t pkt_rx;
+	void *obj;
+};
+
+/*
+ * Output port.
+ */
+struct port_out_type {
+	TAILQ_ENTRY(port_out_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_swx_port_out_ops ops;
+};
+
+TAILQ_HEAD(port_out_type_tailq, port_out_type);
+
+struct port_out {
+	TAILQ_ENTRY(port_out) node;
+	struct port_out_type *type;
+	void *obj;
+	uint32_t id;
+};
+
+TAILQ_HEAD(port_out_tailq, port_out);
+
+struct port_out_runtime {
+	rte_swx_port_out_pkt_tx_t pkt_tx;
+	rte_swx_port_out_flush_t flush;
+	void *obj;
+};
+
+/*
+ * Extern object.
+ */
+struct extern_type_member_func {
+	TAILQ_ENTRY(extern_type_member_func) node;
+	char name[RTE_SWX_NAME_SIZE];
+	rte_swx_extern_type_member_func_t func;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_type_member_func_tailq, extern_type_member_func);
+
+struct extern_type {
+	TAILQ_ENTRY(extern_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *mailbox_struct_type;
+	rte_swx_extern_type_constructor_t constructor;
+	rte_swx_extern_type_destructor_t destructor;
+	struct extern_type_member_func_tailq funcs;
+	uint32_t n_funcs;
+};
+
+TAILQ_HEAD(extern_type_tailq, extern_type);
+
+struct extern_obj {
+	TAILQ_ENTRY(extern_obj) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct extern_type *type;
+	void *obj;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_obj_tailq, extern_obj);
+
+#ifndef RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX
+#define RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX 8
+#endif
+
+struct extern_obj_runtime {
+	void *obj;
+	uint8_t *mailbox;
+	rte_swx_extern_type_member_func_t funcs[RTE_SWX_EXTERN_TYPE_MEMBER_FUNCS_MAX];
+};
+
+/*
+ * Extern function.
+ */
+struct extern_func {
+	TAILQ_ENTRY(extern_func) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *mailbox_struct_type;
+	rte_swx_extern_func_t func;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(extern_func_tailq, extern_func);
+
+struct extern_func_runtime {
+	uint8_t *mailbox;
+	rte_swx_extern_func_t func;
+};
+
+/*
+ * Header.
+ */
+struct header {
+	TAILQ_ENTRY(header) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *st;
+	uint32_t struct_id;
+	uint32_t id;
+};
+
+TAILQ_HEAD(header_tailq, header);
+
+struct header_runtime {
+	uint8_t *ptr0;
+	uint32_t n_bytes;
+};
+
+struct header_out_runtime {
+	uint8_t *ptr0;
+	uint8_t *ptr;
+	uint32_t n_bytes;
+};
+
+/*
+ * Instruction.
+ */
+
+/* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
+ * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
+ * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
+ * when transferred to packet meta-data and in NBO when transferred to packet
+ * headers.
+ */
+
+/* Notation conventions:
+ *    -Header field: H = h.header.field (dst/src)
+ *    -Meta-data field: M = m.field (dst/src)
+ *    -Extern object mailbox field: E = e.field (dst/src)
+ *    -Extern function mailbox field: F = f.field (dst/src)
+ *    -Table action data field: T = t.field (src only)
+ *    -Immediate value: I = 32-bit unsigned value (src only)
+ */
+
+enum instruction_type {
+	/* rx m.port_in */
+	INSTR_RX,
+
+	/* tx port_out
+	 * port_out = MI
+	 */
+	INSTR_TX,   /* port_out = M */
+	INSTR_TX_I, /* port_out = I */
+
+	/* extract h.header */
+	INSTR_HDR_EXTRACT,
+	INSTR_HDR_EXTRACT2,
+	INSTR_HDR_EXTRACT3,
+	INSTR_HDR_EXTRACT4,
+	INSTR_HDR_EXTRACT5,
+	INSTR_HDR_EXTRACT6,
+	INSTR_HDR_EXTRACT7,
+	INSTR_HDR_EXTRACT8,
+
+	/* extract h.header m.last_field_size */
+	INSTR_HDR_EXTRACT_M,
+
+	/* lookahead h.header */
+	INSTR_HDR_LOOKAHEAD,
+
+	/* emit h.header */
+	INSTR_HDR_EMIT,
+	INSTR_HDR_EMIT_TX,
+	INSTR_HDR_EMIT2_TX,
+	INSTR_HDR_EMIT3_TX,
+	INSTR_HDR_EMIT4_TX,
+	INSTR_HDR_EMIT5_TX,
+	INSTR_HDR_EMIT6_TX,
+	INSTR_HDR_EMIT7_TX,
+	INSTR_HDR_EMIT8_TX,
+
+	/* validate h.header */
+	INSTR_HDR_VALIDATE,
+
+	/* invalidate h.header */
+	INSTR_HDR_INVALIDATE,
+
+	/* mov dst src
+	 * dst = src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_MOV,    /* dst = MEF, src = MEFT */
+	INSTR_MOV_MH, /* dst = MEF, src = H */
+	INSTR_MOV_HM, /* dst = H, src = MEFT */
+	INSTR_MOV_HH, /* dst = H, src = H */
+	INSTR_MOV_I,  /* dst = HMEF, src = I */
+
+	/* dma h.header t.field
+	 * memcpy(h.header, t.field, sizeof(h.header))
+	 */
+	INSTR_DMA_HT,
+	INSTR_DMA_HT2,
+	INSTR_DMA_HT3,
+	INSTR_DMA_HT4,
+	INSTR_DMA_HT5,
+	INSTR_DMA_HT6,
+	INSTR_DMA_HT7,
+	INSTR_DMA_HT8,
+
+	/* add dst src
+	 * dst += src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_ADD,    /* dst = MEF, src = MEF */
+	INSTR_ALU_ADD_MH, /* dst = MEF, src = H */
+	INSTR_ALU_ADD_HM, /* dst = H, src = MEF */
+	INSTR_ALU_ADD_HH, /* dst = H, src = H */
+	INSTR_ALU_ADD_MI, /* dst = MEF, src = I */
+	INSTR_ALU_ADD_HI, /* dst = H, src = I */
+
+	/* sub dst src
+	 * dst -= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SUB,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SUB_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SUB_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SUB_HH, /* dst = H, src = H */
+	INSTR_ALU_SUB_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SUB_HI, /* dst = H, src = I */
+
+	/* ckadd dst src
+	 * dst = dst '+ src[0:1] '+ src[2:3] + ...
+	 * dst = H, src = {H, h.header}
+	 */
+	INSTR_ALU_CKADD_FIELD,    /* src = H */
+	INSTR_ALU_CKADD_STRUCT20, /* src = h.header, with sizeof(header) = 20 */
+	INSTR_ALU_CKADD_STRUCT,   /* src = h.hdeader, with any sizeof(header) */
+
+	/* cksub dst src
+	 * dst = dst '- src
+	 * dst = H, src = H
+	 */
+	INSTR_ALU_CKSUB_FIELD,
+
+	/* and dst src
+	 * dst &= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_AND,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_AND_MH, /* dst = MEF, src = H */
+	INSTR_ALU_AND_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_AND_HH, /* dst = H, src = H */
+	INSTR_ALU_AND_I,  /* dst = HMEF, src = I */
+
+	/* or dst src
+	 * dst |= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_OR,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_OR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_OR_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_OR_HH, /* dst = H, src = H */
+	INSTR_ALU_OR_I,  /* dst = HMEF, src = I */
+
+	/* xor dst src
+	 * dst ^= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_XOR,    /* dst = MEF, src = MEFT */
+	INSTR_ALU_XOR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */
+	INSTR_ALU_XOR_HH, /* dst = H, src = H */
+	INSTR_ALU_XOR_I,  /* dst = HMEF, src = I */
+
+	/* shl dst src
+	 * dst <<= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SHL,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SHL_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SHL_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SHL_HH, /* dst = H, src = H */
+	INSTR_ALU_SHL_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SHL_HI, /* dst = H, src = I */
+
+	/* shr dst src
+	 * dst >>= src
+	 * dst = HMEF, src = HMEFTI
+	 */
+	INSTR_ALU_SHR,    /* dst = MEF, src = MEF */
+	INSTR_ALU_SHR_MH, /* dst = MEF, src = H */
+	INSTR_ALU_SHR_HM, /* dst = H, src = MEF */
+	INSTR_ALU_SHR_HH, /* dst = H, src = H */
+	INSTR_ALU_SHR_MI, /* dst = MEF, src = I */
+	INSTR_ALU_SHR_HI, /* dst = H, src = I */
+
+	/* regprefetch REGARRAY index
+	 * prefetch REGARRAY[index]
+	 * index = HMEFTI
+	 */
+	INSTR_REGPREFETCH_RH, /* index = H */
+	INSTR_REGPREFETCH_RM, /* index = MEFT */
+	INSTR_REGPREFETCH_RI, /* index = I */
+
+	/* regrd dst REGARRAY index
+	 * dst = REGARRAY[index]
+	 * dst = HMEF, index = HMEFTI
+	 */
+	INSTR_REGRD_HRH, /* dst = H, index = H */
+	INSTR_REGRD_HRM, /* dst = H, index = MEFT */
+	INSTR_REGRD_HRI, /* dst = H, index = I */
+	INSTR_REGRD_MRH, /* dst = MEF, index = H */
+	INSTR_REGRD_MRM, /* dst = MEF, index = MEFT */
+	INSTR_REGRD_MRI, /* dst = MEF, index = I */
+
+	/* regwr REGARRAY index src
+	 * REGARRAY[index] = src
+	 * index = HMEFTI, src = HMEFTI
+	 */
+	INSTR_REGWR_RHH, /* index = H, src = H */
+	INSTR_REGWR_RHM, /* index = H, src = MEFT */
+	INSTR_REGWR_RHI, /* index = H, src = I */
+	INSTR_REGWR_RMH, /* index = MEFT, src = H */
+	INSTR_REGWR_RMM, /* index = MEFT, src = MEFT */
+	INSTR_REGWR_RMI, /* index = MEFT, src = I */
+	INSTR_REGWR_RIH, /* index = I, src = H */
+	INSTR_REGWR_RIM, /* index = I, src = MEFT */
+	INSTR_REGWR_RII, /* index = I, src = I */
+
+	/* regadd REGARRAY index src
+	 * REGARRAY[index] += src
+	 * index = HMEFTI, src = HMEFTI
+	 */
+	INSTR_REGADD_RHH, /* index = H, src = H */
+	INSTR_REGADD_RHM, /* index = H, src = MEFT */
+	INSTR_REGADD_RHI, /* index = H, src = I */
+	INSTR_REGADD_RMH, /* index = MEFT, src = H */
+	INSTR_REGADD_RMM, /* index = MEFT, src = MEFT */
+	INSTR_REGADD_RMI, /* index = MEFT, src = I */
+	INSTR_REGADD_RIH, /* index = I, src = H */
+	INSTR_REGADD_RIM, /* index = I, src = MEFT */
+	INSTR_REGADD_RII, /* index = I, src = I */
+
+	/* metprefetch METARRAY index
+	 * prefetch METARRAY[index]
+	 * index = HMEFTI
+	 */
+	INSTR_METPREFETCH_H, /* index = H */
+	INSTR_METPREFETCH_M, /* index = MEFT */
+	INSTR_METPREFETCH_I, /* index = I */
+
+	/* meter METARRAY index length color_in color_out
+	 * color_out = meter(METARRAY[index], length, color_in)
+	 * index = HMEFTI, length = HMEFT, color_in = MEFTI, color_out = MEF
+	 */
+	INSTR_METER_HHM, /* index = H, length = H, color_in = MEFT */
+	INSTR_METER_HHI, /* index = H, length = H, color_in = I */
+	INSTR_METER_HMM, /* index = H, length = MEFT, color_in = MEFT */
+	INSTR_METER_HMI, /* index = H, length = MEFT, color_in = I */
+	INSTR_METER_MHM, /* index = MEFT, length = H, color_in = MEFT */
+	INSTR_METER_MHI, /* index = MEFT, length = H, color_in = I */
+	INSTR_METER_MMM, /* index = MEFT, length = MEFT, color_in = MEFT */
+	INSTR_METER_MMI, /* index = MEFT, length = MEFT, color_in = I */
+	INSTR_METER_IHM, /* index = I, length = H, color_in = MEFT */
+	INSTR_METER_IHI, /* index = I, length = H, color_in = I */
+	INSTR_METER_IMM, /* index = I, length = MEFT, color_in = MEFT */
+	INSTR_METER_IMI, /* index = I, length = MEFT, color_in = I */
+
+	/* table TABLE */
+	INSTR_TABLE,
+	INSTR_SELECTOR,
+	INSTR_LEARNER,
+
+	/* learn LEARNER ACTION_NAME */
+	INSTR_LEARNER_LEARN,
+
+	/* forget */
+	INSTR_LEARNER_FORGET,
+
+	/* extern e.obj.func */
+	INSTR_EXTERN_OBJ,
+
+	/* extern f.func */
+	INSTR_EXTERN_FUNC,
+
+	/* jmp LABEL
+	 * Unconditional jump
+	 */
+	INSTR_JMP,
+
+	/* jmpv LABEL h.header
+	 * Jump if header is valid
+	 */
+	INSTR_JMP_VALID,
+
+	/* jmpnv LABEL h.header
+	 * Jump if header is invalid
+	 */
+	INSTR_JMP_INVALID,
+
+	/* jmph LABEL
+	 * Jump if table lookup hit
+	 */
+	INSTR_JMP_HIT,
+
+	/* jmpnh LABEL
+	 * Jump if table lookup miss
+	 */
+	INSTR_JMP_MISS,
+
+	/* jmpa LABEL ACTION
+	 * Jump if action run
+	 */
+	INSTR_JMP_ACTION_HIT,
+
+	/* jmpna LABEL ACTION
+	 * Jump if action not run
+	 */
+	INSTR_JMP_ACTION_MISS,
+
+	/* jmpeq LABEL a b
+	 * Jump if a is equal to b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_EQ,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_EQ_MH, /* a = MEFT, b = H */
+	INSTR_JMP_EQ_HM, /* a = H, b = MEFT */
+	INSTR_JMP_EQ_HH, /* a = H, b = H */
+	INSTR_JMP_EQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
+
+	/* jmpneq LABEL a b
+	 * Jump if a is not equal to b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_NEQ,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */
+	INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */
+	INSTR_JMP_NEQ_HH, /* a = H, b = H */
+	INSTR_JMP_NEQ_I,  /* (a, b) = (MEFT, I) or (a, b) = (H, I) */
+
+	/* jmplt LABEL a b
+	 * Jump if a is less than b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_LT,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_LT_MH, /* a = MEFT, b = H */
+	INSTR_JMP_LT_HM, /* a = H, b = MEFT */
+	INSTR_JMP_LT_HH, /* a = H, b = H */
+	INSTR_JMP_LT_MI, /* a = MEFT, b = I */
+	INSTR_JMP_LT_HI, /* a = H, b = I */
+
+	/* jmpgt LABEL a b
+	 * Jump if a is greater than b
+	 * a = HMEFT, b = HMEFTI
+	 */
+	INSTR_JMP_GT,    /* a = MEFT, b = MEFT */
+	INSTR_JMP_GT_MH, /* a = MEFT, b = H */
+	INSTR_JMP_GT_HM, /* a = H, b = MEFT */
+	INSTR_JMP_GT_HH, /* a = H, b = H */
+	INSTR_JMP_GT_MI, /* a = MEFT, b = I */
+	INSTR_JMP_GT_HI, /* a = H, b = I */
+
+	/* return
+	 * Return from action
+	 */
+	INSTR_RETURN,
+};
+
+struct instr_operand {
+	uint8_t struct_id;
+	uint8_t n_bits;
+	uint8_t offset;
+	uint8_t pad;
+};
+
+struct instr_io {
+	struct {
+		union {
+			struct {
+				uint8_t offset;
+				uint8_t n_bits;
+				uint8_t pad[2];
+			};
+
+			uint32_t val;
+		};
+	} io;
+
+	struct {
+		uint8_t header_id[8];
+		uint8_t struct_id[8];
+		uint8_t n_bytes[8];
+	} hdr;
+};
+
+struct instr_hdr_validity {
+	uint8_t header_id;
+};
+
+struct instr_table {
+	uint8_t table_id;
+};
+
+struct instr_learn {
+	uint8_t action_id;
+};
+
+struct instr_extern_obj {
+	uint8_t ext_obj_id;
+	uint8_t func_id;
+};
+
+struct instr_extern_func {
+	uint8_t ext_func_id;
+};
+
+struct instr_dst_src {
+	struct instr_operand dst;
+	union {
+		struct instr_operand src;
+		uint64_t src_val;
+	};
+};
+
+struct instr_regarray {
+	uint8_t regarray_id;
+	uint8_t pad[3];
+
+	union {
+		struct instr_operand idx;
+		uint32_t idx_val;
+	};
+
+	union {
+		struct instr_operand dstsrc;
+		uint64_t dstsrc_val;
+	};
+};
+
+struct instr_meter {
+	uint8_t metarray_id;
+	uint8_t pad[3];
+
+	union {
+		struct instr_operand idx;
+		uint32_t idx_val;
+	};
+
+	struct instr_operand length;
+
+	union {
+		struct instr_operand color_in;
+		uint32_t color_in_val;
+	};
+
+	struct instr_operand color_out;
+};
+
+struct instr_dma {
+	struct {
+		uint8_t header_id[8];
+		uint8_t struct_id[8];
+	} dst;
+
+	struct {
+		uint8_t offset[8];
+	} src;
+
+	uint16_t n_bytes[8];
+};
+
+struct instr_jmp {
+	struct instruction *ip;
+
+	union {
+		struct instr_operand a;
+		uint8_t header_id;
+		uint8_t action_id;
+	};
+
+	union {
+		struct instr_operand b;
+		uint64_t b_val;
+	};
+};
+
+struct instruction {
+	enum instruction_type type;
+	union {
+		struct instr_io io;
+		struct instr_hdr_validity valid;
+		struct instr_dst_src mov;
+		struct instr_regarray regarray;
+		struct instr_meter meter;
+		struct instr_dma dma;
+		struct instr_dst_src alu;
+		struct instr_table table;
+		struct instr_learn learn;
+		struct instr_extern_obj ext_obj;
+		struct instr_extern_func ext_func;
+		struct instr_jmp jmp;
+	};
+};
+
+struct instruction_data {
+	char label[RTE_SWX_NAME_SIZE];
+	char jmp_label[RTE_SWX_NAME_SIZE];
+	uint32_t n_users; /* user = jmp instruction to this instruction. */
+	int invalid;
+};
+
+/*
+ * Action.
+ */
+struct action {
+	TAILQ_ENTRY(action) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct struct_type *st;
+	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
+	struct instruction *instructions;
+	uint32_t n_instructions;
+	uint32_t id;
+};
+
+TAILQ_HEAD(action_tailq, action);
+
+/*
+ * Table.
+ */
+struct table_type {
+	TAILQ_ENTRY(table_type) node;
+	char name[RTE_SWX_NAME_SIZE];
+	enum rte_swx_table_match_type match_type;
+	struct rte_swx_table_ops ops;
+};
+
+TAILQ_HEAD(table_type_tailq, table_type);
+
+struct match_field {
+	enum rte_swx_table_match_type match_type;
+	struct field *field;
+};
+
+struct table {
+	TAILQ_ENTRY(table) node;
+	char name[RTE_SWX_NAME_SIZE];
+	char args[RTE_SWX_NAME_SIZE];
+	struct table_type *type; /* NULL when n_fields == 0. */
+
+	/* Match. */
+	struct match_field *fields;
+	uint32_t n_fields;
+	struct header *header; /* Only valid when n_fields > 0. */
+
+	/* Action. */
+	struct action **actions;
+	struct action *default_action;
+	uint8_t *default_action_data;
+	uint32_t n_actions;
+	int default_action_is_const;
+	uint32_t action_data_size_max;
+
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(table_tailq, table);
+
+struct table_runtime {
+	rte_swx_table_lookup_t func;
+	void *mailbox;
+	uint8_t **key;
+};
+
+struct table_statistics {
+	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
+	uint64_t *n_pkts_action;
+};
+
+/*
+ * Selector.
+ */
+struct selector {
+	TAILQ_ENTRY(selector) node;
+	char name[RTE_SWX_NAME_SIZE];
+
+	struct field *group_id_field;
+	struct field **selector_fields;
+	uint32_t n_selector_fields;
+	struct header *selector_header;
+	struct field *member_id_field;
+
+	uint32_t n_groups_max;
+	uint32_t n_members_per_group_max;
+
+	uint32_t id;
+};
+
+TAILQ_HEAD(selector_tailq, selector);
+
+struct selector_runtime {
+	void *mailbox;
+	uint8_t **group_id_buffer;
+	uint8_t **selector_buffer;
+	uint8_t **member_id_buffer;
+};
+
+struct selector_statistics {
+	uint64_t n_pkts;
+};
+
+/*
+ * Learner table.
+ */
+struct learner {
+	TAILQ_ENTRY(learner) node;
+	char name[RTE_SWX_NAME_SIZE];
+
+	/* Match. */
+	struct field **fields;
+	uint32_t n_fields;
+	struct header *header;
+
+	/* Action. */
+	struct action **actions;
+	struct field **action_arg;
+	struct action *default_action;
+	uint8_t *default_action_data;
+	uint32_t n_actions;
+	int default_action_is_const;
+	uint32_t action_data_size_max;
+
+	uint32_t size;
+	uint32_t timeout;
+	uint32_t id;
+};
+
+TAILQ_HEAD(learner_tailq, learner);
+
+struct learner_runtime {
+	void *mailbox;
+	uint8_t **key;
+	uint8_t **action_data;
+};
+
+struct learner_statistics {
+	uint64_t n_pkts_hit[2]; /* 0 = Miss, 1 = Hit. */
+	uint64_t n_pkts_learn[2]; /* 0 = Learn OK, 1 = Learn error. */
+	uint64_t n_pkts_forget;
+	uint64_t *n_pkts_action;
+};
+
+/*
+ * Register array.
+ */
+struct regarray {
+	TAILQ_ENTRY(regarray) node;
+	char name[RTE_SWX_NAME_SIZE];
+	uint64_t init_val;
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(regarray_tailq, regarray);
+
+struct regarray_runtime {
+	uint64_t *regarray;
+	uint32_t size_mask;
+};
+
+/*
+ * Meter array.
+ */
+struct meter_profile {
+	TAILQ_ENTRY(meter_profile) node;
+	char name[RTE_SWX_NAME_SIZE];
+	struct rte_meter_trtcm_params params;
+	struct rte_meter_trtcm_profile profile;
+	uint32_t n_users;
+};
+
+TAILQ_HEAD(meter_profile_tailq, meter_profile);
+
+struct metarray {
+	TAILQ_ENTRY(metarray) node;
+	char name[RTE_SWX_NAME_SIZE];
+	uint32_t size;
+	uint32_t id;
+};
+
+TAILQ_HEAD(metarray_tailq, metarray);
+
+struct meter {
+	struct rte_meter_trtcm m;
+	struct meter_profile *profile;
+	enum rte_color color_mask;
+	uint8_t pad[20];
+
+	uint64_t n_pkts[RTE_COLORS];
+	uint64_t n_bytes[RTE_COLORS];
+};
+
+struct metarray_runtime {
+	struct meter *metarray;
+	uint32_t size_mask;
+};
+
+/*
+ * Pipeline.
+ */
+struct thread {
+	/* Packet. */
+	struct rte_swx_pkt pkt;
+	uint8_t *ptr;
+
+	/* Structures. */
+	uint8_t **structs;
+
+	/* Packet headers. */
+	struct header_runtime *headers; /* Extracted or generated headers. */
+	struct header_out_runtime *headers_out; /* Emitted headers. */
+	uint8_t *header_storage;
+	uint8_t *header_out_storage;
+	uint64_t valid_headers;
+	uint32_t n_headers_out;
+
+	/* Packet meta-data. */
+	uint8_t *metadata;
+
+	/* Tables. */
+	struct table_runtime *tables;
+	struct selector_runtime *selectors;
+	struct learner_runtime *learners;
+	struct rte_swx_table_state *table_state;
+	uint64_t action_id;
+	int hit; /* 0 = Miss, 1 = Hit. */
+	uint32_t learner_id;
+	uint64_t time;
+
+	/* Extern objects and functions. */
+	struct extern_obj_runtime *extern_objs;
+	struct extern_func_runtime *extern_funcs;
+
+	/* Instructions. */
+	struct instruction *ip;
+	struct instruction *ret;
+};
+
+#define MASK64_BIT_GET(mask, pos) ((mask) & (1LLU << (pos)))
+#define MASK64_BIT_SET(mask, pos) ((mask) | (1LLU << (pos)))
+#define MASK64_BIT_CLR(mask, pos) ((mask) & ~(1LLU << (pos)))
+
+#define HEADER_VALID(thread, header_id) \
+	MASK64_BIT_GET((thread)->valid_headers, header_id)
+
+#define ALU(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define ALU_MH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#define ALU_HM(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#define ALU_HM_FAST(thread, ip, operator)  \
+{                                                                                 \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];         \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];      \
+	uint64_t dst64 = *dst64_ptr;                                              \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);          \
+	uint64_t dst = dst64 & dst64_mask;                                        \
+										  \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];         \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];      \
+	uint64_t src64 = *src64_ptr;                                              \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits);          \
+	uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \
+										  \
+	uint64_t result = dst operator src;                                       \
+										  \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                              \
+}
+
+#define ALU_HH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->alu.src.n_bits);           \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#define ALU_HH_FAST(thread, ip, operator)  \
+{                                                                                             \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];                     \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];                  \
+	uint64_t dst64 = *dst64_ptr;                                                          \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);                      \
+	uint64_t dst = dst64 & dst64_mask;                                                    \
+											      \
+	uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id];                     \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset];                  \
+	uint64_t src64 = *src64_ptr;                                                          \
+	uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \
+											      \
+	uint64_t result = dst operator src;                                                   \
+											      \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                                          \
+}
+
+#else
+
+#define ALU_MH ALU
+#define ALU_HM ALU
+#define ALU_HM_FAST ALU
+#define ALU_HH ALU
+#define ALU_HH_FAST ALU
+
+#endif
+
+#define ALU_I(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = dst64 & dst64_mask;                                     \
+									       \
+	uint64_t src = (ip)->alu.src_val;                                      \
+									       \
+	uint64_t result = dst operator src;                                    \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask);            \
+}
+
+#define ALU_MI ALU_I
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define ALU_HI(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits);       \
+	uint64_t dst = ntoh64(dst64) >> (64 - (ip)->alu.dst.n_bits);           \
+									       \
+	uint64_t src = (ip)->alu.src_val;                                      \
+									       \
+	uint64_t result = dst operator src;                                    \
+	result = hton64(result << (64 - (ip)->alu.dst.n_bits));                \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | result;                           \
+}
+
+#else
+
+#define ALU_HI ALU_I
+
+#endif
+
+#define MOV(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define MOV_MH(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src = ntoh64(src64) >> (64 - (ip)->mov.src.n_bits);           \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#define MOV_HM(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits);       \
+	uint64_t src = src64 & src64_mask;                                     \
+									       \
+	src = hton64(src) >> (64 - (ip)->mov.dst.n_bits);                      \
+	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
+}
+
+#define MOV_HH(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id];      \
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset];   \
+	uint64_t src64 = *src64_ptr;                                           \
+									       \
+	uint64_t src = src64 << (64 - (ip)->mov.src.n_bits);                   \
+	src = src >> (64 - (ip)->mov.dst.n_bits);                              \
+	*dst64_ptr = (dst64 & ~dst64_mask) | src;                              \
+}
+
+#else
+
+#define MOV_MH MOV
+#define MOV_HM MOV
+#define MOV_HH MOV
+
+#endif
+
+#define MOV_I(thread, ip)  \
+{                                                                              \
+	uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id];      \
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset];   \
+	uint64_t dst64 = *dst64_ptr;                                           \
+	uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits);       \
+									       \
+	uint64_t src = (ip)->mov.src_val;                                      \
+									       \
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);               \
+}
+
+#define JMP_CMP(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
+	uint64_t b = b64 & b64_mask;                                           \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define JMP_CMP_MH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HM(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b64_mask = UINT64_MAX >> (64 - (ip)->jmp.b.n_bits);           \
+	uint64_t b = b64 & b64_mask;                                           \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HH(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = ntoh64(b64) >> (64 - (ip)->jmp.b.n_bits);                 \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_HH_FAST(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits);                         \
+									       \
+	uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id];          \
+	uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset];         \
+	uint64_t b64 = *b64_ptr;                                               \
+	uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits);                         \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#else
+
+#define JMP_CMP_MH JMP_CMP
+#define JMP_CMP_HM JMP_CMP
+#define JMP_CMP_HH JMP_CMP
+#define JMP_CMP_HH_FAST JMP_CMP
+
+#endif
+
+#define JMP_CMP_I(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a64_mask = UINT64_MAX >> (64 - (ip)->jmp.a.n_bits);           \
+	uint64_t a = a64 & a64_mask;                                           \
+									       \
+	uint64_t b = (ip)->jmp.b_val;                                          \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#define JMP_CMP_MI JMP_CMP_I
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define JMP_CMP_HI(thread, ip, operator)  \
+{                                                                              \
+	uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id];          \
+	uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset];         \
+	uint64_t a64 = *a64_ptr;                                               \
+	uint64_t a = ntoh64(a64) >> (64 - (ip)->jmp.a.n_bits);                 \
+									       \
+	uint64_t b = (ip)->jmp.b_val;                                          \
+									       \
+	(thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1);     \
+}
+
+#else
+
+#define JMP_CMP_HI JMP_CMP_I
+
+#endif
+
+#define METADATA_READ(thread, offset, n_bits)                                  \
+({                                                                             \
+	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
+	uint64_t m64 = *m64_ptr;                                               \
+	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
+	(m64 & m64_mask);                                                      \
+})
+
+#define METADATA_WRITE(thread, offset, n_bits, value)                          \
+{                                                                              \
+	uint64_t *m64_ptr = (uint64_t *)&(thread)->metadata[offset];           \
+	uint64_t m64 = *m64_ptr;                                               \
+	uint64_t m64_mask = UINT64_MAX >> (64 - (n_bits));                     \
+									       \
+	uint64_t m_new = value;                                                \
+									       \
+	*m64_ptr = (m64 & ~m64_mask) | (m_new & m64_mask);                     \
+}
+
+#ifndef RTE_SWX_PIPELINE_THREADS_MAX
+#define RTE_SWX_PIPELINE_THREADS_MAX 16
+#endif
+
+struct rte_swx_pipeline {
+	struct struct_type_tailq struct_types;
+	struct port_in_type_tailq port_in_types;
+	struct port_in_tailq ports_in;
+	struct port_out_type_tailq port_out_types;
+	struct port_out_tailq ports_out;
+	struct extern_type_tailq extern_types;
+	struct extern_obj_tailq extern_objs;
+	struct extern_func_tailq extern_funcs;
+	struct header_tailq headers;
+	struct struct_type *metadata_st;
+	uint32_t metadata_struct_id;
+	struct action_tailq actions;
+	struct table_type_tailq table_types;
+	struct table_tailq tables;
+	struct selector_tailq selectors;
+	struct learner_tailq learners;
+	struct regarray_tailq regarrays;
+	struct meter_profile_tailq meter_profiles;
+	struct metarray_tailq metarrays;
+
+	struct port_in_runtime *in;
+	struct port_out_runtime *out;
+	struct instruction **action_instructions;
+	struct rte_swx_table_state *table_state;
+	struct table_statistics *table_stats;
+	struct selector_statistics *selector_stats;
+	struct learner_statistics *learner_stats;
+	struct regarray_runtime *regarray_runtime;
+	struct metarray_runtime *metarray_runtime;
+	struct instruction *instructions;
+	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
+
+	uint32_t n_structs;
+	uint32_t n_ports_in;
+	uint32_t n_ports_out;
+	uint32_t n_extern_objs;
+	uint32_t n_extern_funcs;
+	uint32_t n_actions;
+	uint32_t n_tables;
+	uint32_t n_selectors;
+	uint32_t n_learners;
+	uint32_t n_regarrays;
+	uint32_t n_metarrays;
+	uint32_t n_headers;
+	uint32_t thread_id;
+	uint32_t port_id;
+	uint32_t n_instructions;
+	int build_done;
+	int numa_node;
+};
+
+#endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 02/24] pipeline: move thread inline functions to header file
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
                       ` (24 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Move the thread inline functions to the internal header file.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 56 ----------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 59 ++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ae9b2056db..7e01453c27 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1492,62 +1492,6 @@ struct_field_parse(struct rte_swx_pipeline *p,
 	}
 }
 
-static inline void
-pipeline_port_inc(struct rte_swx_pipeline *p)
-{
-	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
-}
-
-static inline void
-thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
-{
-	t->ip = p->instructions;
-}
-
-static inline void
-thread_ip_set(struct thread *t, struct instruction *ip)
-{
-	t->ip = ip;
-}
-
-static inline void
-thread_ip_action_call(struct rte_swx_pipeline *p,
-		      struct thread *t,
-		      uint32_t action_id)
-{
-	t->ret = t->ip + 1;
-	t->ip = p->action_instructions[action_id];
-}
-
-static inline void
-thread_ip_inc(struct rte_swx_pipeline *p);
-
-static inline void
-thread_ip_inc(struct rte_swx_pipeline *p)
-{
-	struct thread *t = &p->threads[p->thread_id];
-
-	t->ip++;
-}
-
-static inline void
-thread_ip_inc_cond(struct thread *t, int cond)
-{
-	t->ip += cond;
-}
-
-static inline void
-thread_yield(struct rte_swx_pipeline *p)
-{
-	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
-}
-
-static inline void
-thread_yield_cond(struct rte_swx_pipeline *p, int cond)
-{
-	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
-}
-
 /*
  * rx.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 5d80dd8451..682f4c86a0 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1380,4 +1380,63 @@ struct rte_swx_pipeline {
 	int numa_node;
 };
 
+/*
+ * Instruction.
+ */
+static inline void
+pipeline_port_inc(struct rte_swx_pipeline *p)
+{
+	p->port_id = (p->port_id + 1) & (p->n_ports_in - 1);
+}
+
+static inline void
+thread_ip_reset(struct rte_swx_pipeline *p, struct thread *t)
+{
+	t->ip = p->instructions;
+}
+
+static inline void
+thread_ip_set(struct thread *t, struct instruction *ip)
+{
+	t->ip = ip;
+}
+
+static inline void
+thread_ip_action_call(struct rte_swx_pipeline *p,
+		      struct thread *t,
+		      uint32_t action_id)
+{
+	t->ret = t->ip + 1;
+	t->ip = p->action_instructions[action_id];
+}
+
+static inline void
+thread_ip_inc(struct rte_swx_pipeline *p);
+
+static inline void
+thread_ip_inc(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+
+	t->ip++;
+}
+
+static inline void
+thread_ip_inc_cond(struct thread *t, int cond)
+{
+	t->ip += cond;
+}
+
+static inline void
+thread_yield(struct rte_swx_pipeline *p)
+{
+	p->thread_id = (p->thread_id + 1) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
+}
+
+static inline void
+thread_yield_cond(struct rte_swx_pipeline *p, int cond)
+{
+	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 03/24] pipeline: create inline functions for RX instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
                       ` (23 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the RX instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 38 ------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 51 ++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 7e01453c27..ad1ecfc640 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1517,44 +1517,6 @@ instr_rx_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline void
-instr_rx_exec(struct rte_swx_pipeline *p);
-
-static inline void
-instr_rx_exec(struct rte_swx_pipeline *p)
-{
-	struct thread *t = &p->threads[p->thread_id];
-	struct instruction *ip = t->ip;
-	struct port_in_runtime *port = &p->in[p->port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
-	int pkt_received;
-
-	/* Packet. */
-	pkt_received = port->pkt_rx(port->obj, pkt);
-	t->ptr = &pkt->pkt[pkt->offset];
-	rte_prefetch0(t->ptr);
-
-	TRACE("[Thread %2u] rx %s from port %u\n",
-	      p->thread_id,
-	      pkt_received ? "1 pkt" : "0 pkts",
-	      p->port_id);
-
-	/* Headers. */
-	t->valid_headers = 0;
-	t->n_headers_out = 0;
-
-	/* Meta-data. */
-	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
-
-	/* Tables. */
-	t->table_state = p->table_state;
-
-	/* Thread. */
-	pipeline_port_inc(p);
-	thread_ip_inc_cond(t, pkt_received);
-	thread_yield(p);
-}
-
 /*
  * tx.
  */
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 682f4c86a0..9814b5685a 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1439,4 +1439,55 @@ thread_yield_cond(struct rte_swx_pipeline *p, int cond)
 	p->thread_id = (p->thread_id + cond) & (RTE_SWX_PIPELINE_THREADS_MAX - 1);
 }
 
+/*
+ * rx.
+ */
+static inline int
+__instr_rx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct port_in_runtime *port = &p->in[p->port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+	int pkt_received;
+
+	/* Packet. */
+	pkt_received = port->pkt_rx(port->obj, pkt);
+	t->ptr = &pkt->pkt[pkt->offset];
+	rte_prefetch0(t->ptr);
+
+	TRACE("[Thread %2u] rx %s from port %u\n",
+	      p->thread_id,
+	      pkt_received ? "1 pkt" : "0 pkts",
+	      p->port_id);
+
+	/* Headers. */
+	t->valid_headers = 0;
+	t->n_headers_out = 0;
+
+	/* Meta-data. */
+	METADATA_WRITE(t, ip->io.io.offset, ip->io.io.n_bits, p->port_id);
+
+	/* Tables. */
+	t->table_state = p->table_state;
+
+	/* Thread. */
+	pipeline_port_inc(p);
+
+	return pkt_received;
+}
+
+static inline void
+instr_rx_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	int pkt_received;
+
+	/* Packet. */
+	pkt_received = __instr_rx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc_cond(t, pkt_received);
+	thread_yield(p);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 04/24] pipeline: create inline functions for TX instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
                       ` (22 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the TX instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 86 +---------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 90 ++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 84 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ad1ecfc640..bcf796f8c3 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1567,84 +1567,13 @@ instr_drop_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline void
-emit_handler(struct thread *t)
-{
-	struct header_out_runtime *h0 = &t->headers_out[0];
-	struct header_out_runtime *h1 = &t->headers_out[1];
-	uint32_t offset = 0, i;
-
-	/* No header change or header decapsulation. */
-	if ((t->n_headers_out == 1) &&
-	    (h0->ptr + h0->n_bytes == t->ptr)) {
-		TRACE("Emit handler: no header change or header decap.\n");
-
-		t->pkt.offset -= h0->n_bytes;
-		t->pkt.length += h0->n_bytes;
-
-		return;
-	}
-
-	/* Header encapsulation (optionally, with prior header decasulation). */
-	if ((t->n_headers_out == 2) &&
-	    (h1->ptr + h1->n_bytes == t->ptr) &&
-	    (h0->ptr == h0->ptr0)) {
-		uint32_t offset;
-
-		TRACE("Emit handler: header encapsulation.\n");
-
-		offset = h0->n_bytes + h1->n_bytes;
-		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
-		t->pkt.offset -= offset;
-		t->pkt.length += offset;
-
-		return;
-	}
-
-	/* Header insertion. */
-	/* TBD */
-
-	/* Header extraction. */
-	/* TBD */
-
-	/* For any other case. */
-	TRACE("Emit handler: complex case.\n");
-
-	for (i = 0; i < t->n_headers_out; i++) {
-		struct header_out_runtime *h = &t->headers_out[i];
-
-		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
-		offset += h->n_bytes;
-	}
-
-	if (offset) {
-		memcpy(t->ptr - offset, t->header_out_storage, offset);
-		t->pkt.offset -= offset;
-		t->pkt.length += offset;
-	}
-}
-
-static inline void
-instr_tx_exec(struct rte_swx_pipeline *p);
-
 static inline void
 instr_tx_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
-	struct port_out_runtime *port = &p->out[port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
 
-	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
-	      p->thread_id,
-	      (uint32_t)port_id);
-
-	/* Headers. */
-	emit_handler(t);
-
-	/* Packet. */
-	port->pkt_tx(port->obj, pkt);
+	__instr_tx_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_reset(p, t);
@@ -1656,19 +1585,8 @@ instr_tx_i_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t port_id = ip->io.io.val;
-	struct port_out_runtime *port = &p->out[port_id];
-	struct rte_swx_pkt *pkt = &t->pkt;
-
-	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
-	      p->thread_id,
-	      (uint32_t)port_id);
-
-	/* Headers. */
-	emit_handler(t);
 
-	/* Packet. */
-	port->pkt_tx(port->obj, pkt);
+	__instr_tx_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_reset(p, t);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 9814b5685a..e9fe6632b6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1490,4 +1490,94 @@ instr_rx_exec(struct rte_swx_pipeline *p)
 	thread_yield(p);
 }
 
+/*
+ * tx.
+ */
+static inline void
+emit_handler(struct thread *t)
+{
+	struct header_out_runtime *h0 = &t->headers_out[0];
+	struct header_out_runtime *h1 = &t->headers_out[1];
+	uint32_t offset = 0, i;
+
+	/* No header change or header decapsulation. */
+	if ((t->n_headers_out == 1) &&
+	    (h0->ptr + h0->n_bytes == t->ptr)) {
+		TRACE("Emit handler: no header change or header decap.\n");
+
+		t->pkt.offset -= h0->n_bytes;
+		t->pkt.length += h0->n_bytes;
+
+		return;
+	}
+
+	/* Header encapsulation (optionally, with prior header decasulation). */
+	if ((t->n_headers_out == 2) &&
+	    (h1->ptr + h1->n_bytes == t->ptr) &&
+	    (h0->ptr == h0->ptr0)) {
+		uint32_t offset;
+
+		TRACE("Emit handler: header encapsulation.\n");
+
+		offset = h0->n_bytes + h1->n_bytes;
+		memcpy(t->ptr - offset, h0->ptr, h0->n_bytes);
+		t->pkt.offset -= offset;
+		t->pkt.length += offset;
+
+		return;
+	}
+
+	/* For any other case. */
+	TRACE("Emit handler: complex case.\n");
+
+	for (i = 0; i < t->n_headers_out; i++) {
+		struct header_out_runtime *h = &t->headers_out[i];
+
+		memcpy(&t->header_out_storage[offset], h->ptr, h->n_bytes);
+		offset += h->n_bytes;
+	}
+
+	if (offset) {
+		memcpy(t->ptr - offset, t->header_out_storage, offset);
+		t->pkt.offset -= offset;
+		t->pkt.length += offset;
+	}
+}
+
+static inline void
+__instr_tx_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t port_id = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
+	struct port_out_runtime *port = &p->out[port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+
+	TRACE("[Thread %2u]: tx 1 pkt to port %u\n",
+	      p->thread_id,
+	      (uint32_t)port_id);
+
+	/* Headers. */
+	emit_handler(t);
+
+	/* Packet. */
+	port->pkt_tx(port->obj, pkt);
+}
+
+static inline void
+__instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t port_id = ip->io.io.val;
+	struct port_out_runtime *port = &p->out[port_id];
+	struct rte_swx_pkt *pkt = &t->pkt;
+
+	TRACE("[Thread %2u]: tx (i) 1 pkt to port %u\n",
+	      p->thread_id,
+	      (uint32_t)port_id);
+
+	/* Headers. */
+	emit_handler(t);
+
+	/* Packet. */
+	port->pkt_tx(port->obj, pkt);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 05/24] pipeline: create inline functions for extract instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (2 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
                       ` (21 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the extract instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 130 ++++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 178 +++++++++++++++++++++++
 2 files changed, 203 insertions(+), 105 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index bcf796f8c3..fd7e31b709 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1665,52 +1665,12 @@ instr_hdr_lookahead_translate(struct rte_swx_pipeline *p,
 }
 
 static inline void
-__instr_hdr_extract_exec(struct rte_swx_pipeline *p, uint32_t n_extract);
-
-static inline void
-__instr_hdr_extract_exec(struct rte_swx_pipeline *p, uint32_t n_extract)
+instr_hdr_extract_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-	uint32_t offset = t->pkt.offset;
-	uint32_t length = t->pkt.length;
-	uint32_t i;
 
-	for (i = 0; i < n_extract; i++) {
-		uint32_t header_id = ip->io.hdr.header_id[i];
-		uint32_t struct_id = ip->io.hdr.struct_id[i];
-		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
-
-		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
-		      p->thread_id,
-		      header_id,
-		      n_bytes);
-
-		/* Headers. */
-		t->structs[struct_id] = ptr;
-		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-
-		/* Packet. */
-		offset += n_bytes;
-		length -= n_bytes;
-		ptr += n_bytes;
-	}
-
-	/* Headers. */
-	t->valid_headers = valid_headers;
-
-	/* Packet. */
-	t->pkt.offset = offset;
-	t->pkt.length = length;
-	t->ptr = ptr;
-}
-
-static inline void
-instr_hdr_extract_exec(struct rte_swx_pipeline *p)
-{
-	__instr_hdr_extract_exec(p, 1);
+	__instr_hdr_extract_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1719,10 +1679,10 @@ instr_hdr_extract_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract2_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 2);
+	__instr_hdr_extract2_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1731,10 +1691,10 @@ instr_hdr_extract2_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract3_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 3);
+	__instr_hdr_extract3_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1743,10 +1703,10 @@ instr_hdr_extract3_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract4_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 4);
+	__instr_hdr_extract4_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1755,10 +1715,10 @@ instr_hdr_extract4_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract5_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 5);
+	__instr_hdr_extract5_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1767,10 +1727,10 @@ instr_hdr_extract5_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract6_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 6);
+	__instr_hdr_extract6_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1779,10 +1739,10 @@ instr_hdr_extract6_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract7_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 7);
+	__instr_hdr_extract7_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1791,10 +1751,10 @@ instr_hdr_extract7_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_extract8_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_extract_exec(p, 8);
+	__instr_hdr_extract8_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1806,35 +1766,7 @@ instr_hdr_extract_m_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-	uint32_t offset = t->pkt.offset;
-	uint32_t length = t->pkt.length;
-
-	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
-	uint32_t header_id = ip->io.hdr.header_id[0];
-	uint32_t struct_id = ip->io.hdr.struct_id[0];
-	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
-
-	struct header_runtime *h = &t->headers[header_id];
-
-	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
-	      p->thread_id,
-	      header_id,
-	      n_bytes,
-	      n_bytes_last);
-
-	n_bytes += n_bytes_last;
-
-	/* Headers. */
-	t->structs[struct_id] = ptr;
-	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-	h->n_bytes = n_bytes;
-
-	/* Packet. */
-	t->pkt.offset = offset + n_bytes;
-	t->pkt.length = length - n_bytes;
-	t->ptr = ptr + n_bytes;
+	__instr_hdr_extract_m_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1846,19 +1778,7 @@ instr_hdr_lookahead_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	uint64_t valid_headers = t->valid_headers;
-	uint8_t *ptr = t->ptr;
-
-	uint32_t header_id = ip->io.hdr.header_id[0];
-	uint32_t struct_id = ip->io.hdr.struct_id[0];
-
-	TRACE("[Thread %2u]: lookahead header %u\n",
-	      p->thread_id,
-	      header_id);
-
-	/* Headers. */
-	t->structs[struct_id] = ptr;
-	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	__instr_hdr_lookahead_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index e9fe6632b6..1519bcc305 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1580,4 +1580,182 @@ __instr_tx_i_exec(struct rte_swx_pipeline *p, struct thread *t, const struct ins
 	port->pkt_tx(port->obj, pkt);
 }
 
+/*
+ * extract.
+ */
+static inline void
+__instr_hdr_extract_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			      struct thread *t,
+			      const struct instruction *ip,
+			      uint32_t n_extract)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+	uint32_t offset = t->pkt.offset;
+	uint32_t length = t->pkt.length;
+	uint32_t i;
+
+	for (i = 0; i < n_extract; i++) {
+		uint32_t header_id = ip->io.hdr.header_id[i];
+		uint32_t struct_id = ip->io.hdr.struct_id[i];
+		uint32_t n_bytes = ip->io.hdr.n_bytes[i];
+
+		TRACE("[Thread %2u]: extract header %u (%u bytes)\n",
+		      p->thread_id,
+		      header_id,
+		      n_bytes);
+
+		/* Headers. */
+		t->structs[struct_id] = ptr;
+		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+
+		/* Packet. */
+		offset += n_bytes;
+		length -= n_bytes;
+		ptr += n_bytes;
+	}
+
+	/* Headers. */
+	t->valid_headers = valid_headers;
+
+	/* Packet. */
+	t->pkt.offset = offset;
+	t->pkt.length = length;
+	t->ptr = ptr;
+}
+
+static inline void
+__instr_hdr_extract_exec(struct rte_swx_pipeline *p,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	__instr_hdr_extract_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_hdr_extract2_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 2);
+}
+
+static inline void
+__instr_hdr_extract3_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 3);
+}
+
+static inline void
+__instr_hdr_extract4_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 4);
+}
+
+static inline void
+__instr_hdr_extract5_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 5);
+}
+
+static inline void
+__instr_hdr_extract6_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 6);
+}
+
+static inline void
+__instr_hdr_extract7_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 7);
+}
+
+static inline void
+__instr_hdr_extract8_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_extract_many_exec(p, t, ip, 8);
+}
+
+static inline void
+__instr_hdr_extract_m_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+	uint32_t offset = t->pkt.offset;
+	uint32_t length = t->pkt.length;
+
+	uint32_t n_bytes_last = METADATA_READ(t, ip->io.io.offset, ip->io.io.n_bits);
+	uint32_t header_id = ip->io.hdr.header_id[0];
+	uint32_t struct_id = ip->io.hdr.struct_id[0];
+	uint32_t n_bytes = ip->io.hdr.n_bytes[0];
+
+	struct header_runtime *h = &t->headers[header_id];
+
+	TRACE("[Thread %2u]: extract header %u (%u + %u bytes)\n",
+	      p->thread_id,
+	      header_id,
+	      n_bytes,
+	      n_bytes_last);
+
+	n_bytes += n_bytes_last;
+
+	/* Headers. */
+	t->structs[struct_id] = ptr;
+	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	h->n_bytes = n_bytes;
+
+	/* Packet. */
+	t->pkt.offset = offset + n_bytes;
+	t->pkt.length = length - n_bytes;
+	t->ptr = ptr + n_bytes;
+}
+
+static inline void
+__instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint8_t *ptr = t->ptr;
+
+	uint32_t header_id = ip->io.hdr.header_id[0];
+	uint32_t struct_id = ip->io.hdr.struct_id[0];
+
+	TRACE("[Thread %2u]: lookahead header %u\n",
+	      p->thread_id,
+	      header_id);
+
+	/* Headers. */
+	t->structs[struct_id] = ptr;
+	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 06/24] pipeline: create inline functions for emit instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (3 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
                       ` (20 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the emit instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 162 ++++++++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 170 +++++++++++++++++++++++
 2 files changed, 228 insertions(+), 104 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index fd7e31b709..80c5fb94bb 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1810,82 +1810,12 @@ instr_hdr_emit_translate(struct rte_swx_pipeline *p,
 }
 
 static inline void
-__instr_hdr_emit_exec(struct rte_swx_pipeline *p, uint32_t n_emit);
-
-static inline void
-__instr_hdr_emit_exec(struct rte_swx_pipeline *p, uint32_t n_emit)
+instr_hdr_emit_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t valid_headers = t->valid_headers;
-	uint32_t n_headers_out = t->n_headers_out;
-	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
-	uint8_t *ho_ptr = NULL;
-	uint32_t ho_nbytes = 0, first = 1, i;
-
-	for (i = 0; i < n_emit; i++) {
-		uint32_t header_id = ip->io.hdr.header_id[i];
-		uint32_t struct_id = ip->io.hdr.struct_id[i];
-
-		struct header_runtime *hi = &t->headers[header_id];
-		uint8_t *hi_ptr0 = hi->ptr0;
-		uint32_t n_bytes = hi->n_bytes;
-
-		uint8_t *hi_ptr = t->structs[struct_id];
-
-		if (!MASK64_BIT_GET(valid_headers, header_id))
-			continue;
-
-		TRACE("[Thread %2u]: emit header %u\n",
-		      p->thread_id,
-		      header_id);
-
-		/* Headers. */
-		if (first) {
-			first = 0;
-
-			if (!t->n_headers_out) {
-				ho = &t->headers_out[0];
-
-				ho->ptr0 = hi_ptr0;
-				ho->ptr = hi_ptr;
-
-				ho_ptr = hi_ptr;
-				ho_nbytes = n_bytes;
-
-				n_headers_out = 1;
-
-				continue;
-			} else {
-				ho_ptr = ho->ptr;
-				ho_nbytes = ho->n_bytes;
-			}
-		}
-
-		if (ho_ptr + ho_nbytes == hi_ptr) {
-			ho_nbytes += n_bytes;
-		} else {
-			ho->n_bytes = ho_nbytes;
-
-			ho++;
-			ho->ptr0 = hi_ptr0;
-			ho->ptr = hi_ptr;
 
-			ho_ptr = hi_ptr;
-			ho_nbytes = n_bytes;
-
-			n_headers_out++;
-		}
-	}
-
-	ho->n_bytes = ho_nbytes;
-	t->n_headers_out = n_headers_out;
-}
-
-static inline void
-instr_hdr_emit_exec(struct rte_swx_pipeline *p)
-{
-	__instr_hdr_emit_exec(p, 1);
+	__instr_hdr_emit_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1894,81 +1824,105 @@ instr_hdr_emit_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 1);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_emit_exec(p, 2);
-	instr_tx_exec(p);
+	__instr_hdr_emit2_tx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit3_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 3);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit4_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 4);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_hdr_emit_exec(p, 5);
-	instr_tx_exec(p);
+	__instr_hdr_emit5_tx_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit6_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 6);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit7_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 7);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 static inline void
 instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_hdr_emit8_tx_exec(p, t, ip);
 
-	__instr_hdr_emit_exec(p, 8);
-	instr_tx_exec(p);
+	/* Thread. */
+	thread_ip_reset(p, t);
+	instr_rx_exec(p);
 }
 
 /*
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 1519bcc305..8b37a9812e 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1758,4 +1758,174 @@ __instr_hdr_lookahead_exec(struct rte_swx_pipeline *p __rte_unused,
 	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
 }
 
+/*
+ * emit.
+ */
+static inline void
+__instr_hdr_emit_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			   struct thread *t,
+			   const struct instruction *ip,
+			   uint32_t n_emit)
+{
+	uint64_t valid_headers = t->valid_headers;
+	uint32_t n_headers_out = t->n_headers_out;
+	struct header_out_runtime *ho = &t->headers_out[n_headers_out - 1];
+	uint8_t *ho_ptr = NULL;
+	uint32_t ho_nbytes = 0, first = 1, i;
+
+	for (i = 0; i < n_emit; i++) {
+		uint32_t header_id = ip->io.hdr.header_id[i];
+		uint32_t struct_id = ip->io.hdr.struct_id[i];
+
+		struct header_runtime *hi = &t->headers[header_id];
+		uint8_t *hi_ptr0 = hi->ptr0;
+		uint32_t n_bytes = hi->n_bytes;
+
+		uint8_t *hi_ptr = t->structs[struct_id];
+
+		if (!MASK64_BIT_GET(valid_headers, header_id))
+			continue;
+
+		TRACE("[Thread %2u]: emit header %u\n",
+		      p->thread_id,
+		      header_id);
+
+		/* Headers. */
+		if (first) {
+			first = 0;
+
+			if (!t->n_headers_out) {
+				ho = &t->headers_out[0];
+
+				ho->ptr0 = hi_ptr0;
+				ho->ptr = hi_ptr;
+
+				ho_ptr = hi_ptr;
+				ho_nbytes = n_bytes;
+
+				n_headers_out = 1;
+
+				continue;
+			} else {
+				ho_ptr = ho->ptr;
+				ho_nbytes = ho->n_bytes;
+			}
+		}
+
+		if (ho_ptr + ho_nbytes == hi_ptr) {
+			ho_nbytes += n_bytes;
+		} else {
+			ho->n_bytes = ho_nbytes;
+
+			ho++;
+			ho->ptr0 = hi_ptr0;
+			ho->ptr = hi_ptr;
+
+			ho_ptr = hi_ptr;
+			ho_nbytes = n_bytes;
+
+			n_headers_out++;
+		}
+	}
+
+	ho->n_bytes = ho_nbytes;
+	t->n_headers_out = n_headers_out;
+}
+
+static inline void
+__instr_hdr_emit_exec(struct rte_swx_pipeline *p,
+		      struct thread *t,
+		      const struct instruction *ip)
+{
+	__instr_hdr_emit_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_hdr_emit_tx_exec(struct rte_swx_pipeline *p,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 1);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit2_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 2);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit3_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 3);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit4_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 4);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit5_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 5);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit6_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 6);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit7_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 7);
+	__instr_tx_exec(p, t, ip);
+}
+
+static inline void
+__instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 9 instructions are fused. ***\n", p->thread_id);
+
+	__instr_hdr_emit_many_exec(p, t, ip, 8);
+	__instr_tx_exec(p, t, ip);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 07/24] pipeline: create inline functions for validate instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (4 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
                       ` (19 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the validate and invalidate instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 12 ++-------
 lib/pipeline/rte_swx_pipeline_internal.h | 32 ++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 80c5fb94bb..6c6d8e52a5 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1953,12 +1953,8 @@ instr_hdr_validate_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t header_id = ip->valid.header_id;
 
-	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
-
-	/* Headers. */
-	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+	__instr_hdr_validate_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -1992,12 +1988,8 @@ instr_hdr_invalidate_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t header_id = ip->valid.header_id;
-
-	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
 
-	/* Headers. */
-	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
+	__instr_hdr_invalidate_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 8b37a9812e..312490f11a 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1928,4 +1928,36 @@ __instr_hdr_emit8_tx_exec(struct rte_swx_pipeline *p,
 	__instr_tx_exec(p, t, ip);
 }
 
+/*
+ * validate.
+ */
+static inline void
+__instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
+			  struct thread *t,
+			  const struct instruction *ip)
+{
+	uint32_t header_id = ip->valid.header_id;
+
+	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
+
+	/* Headers. */
+	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+}
+
+/*
+ * invalidate.
+ */
+static inline void
+__instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint32_t header_id = ip->valid.header_id;
+
+	TRACE("[Thread %2u] invalidate header %u\n", p->thread_id, header_id);
+
+	/* Headers. */
+	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 08/24] pipeline: create inline functions for learn instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (5 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
                       ` (18 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the learn and forget instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 36 ++--------------
 lib/pipeline/rte_swx_pipeline_internal.h | 55 ++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 6c6d8e52a5..ca12f34b01 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2230,27 +2230,8 @@ instr_learn_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t action_id = ip->learn.action_id;
-	uint32_t learner_id = t->learner_id;
-	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
-		p->n_selectors + learner_id];
-	struct learner_runtime *l = &t->learners[learner_id];
-	struct learner_statistics *stats = &p->learner_stats[learner_id];
-	uint32_t status;
-
-	/* Table. */
-	status = rte_swx_table_learner_add(ts->obj,
-					   l->mailbox,
-					   t->time,
-					   action_id,
-					   l->action_data[action_id]);
-
-	TRACE("[Thread %2u] learner %u learn %s\n",
-	      p->thread_id,
-	      learner_id,
-	      status ? "ok" : "error");
 
-	stats->n_pkts_learn[status] += 1;
+	__instr_learn_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2279,20 +2260,9 @@ static inline void
 instr_forget_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
-	uint32_t learner_id = t->learner_id;
-	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
-		p->n_selectors + learner_id];
-	struct learner_runtime *l = &t->learners[learner_id];
-	struct learner_statistics *stats = &p->learner_stats[learner_id];
-
-	/* Table. */
-	rte_swx_table_learner_delete(ts->obj, l->mailbox);
-
-	TRACE("[Thread %2u] learner %u forget\n",
-	      p->thread_id,
-	      learner_id);
+	struct instruction *ip = t->ip;
 
-	stats->n_pkts_forget += 1;
+	__instr_forget_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 312490f11a..24096a23b6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1960,4 +1960,59 @@ __instr_hdr_invalidate_exec(struct rte_swx_pipeline *p __rte_unused,
 	t->valid_headers = MASK64_BIT_CLR(t->valid_headers, header_id);
 }
 
+/*
+ * learn.
+ */
+static inline void
+__instr_learn_exec(struct rte_swx_pipeline *p,
+		   struct thread *t,
+		   const struct instruction *ip)
+{
+	uint64_t action_id = ip->learn.action_id;
+	uint32_t learner_id = t->learner_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+	uint32_t status;
+
+	/* Table. */
+	status = rte_swx_table_learner_add(ts->obj,
+					   l->mailbox,
+					   t->time,
+					   action_id,
+					   l->action_data[action_id]);
+
+	TRACE("[Thread %2u] learner %u learn %s\n",
+	      p->thread_id,
+	      learner_id,
+	      status ? "ok" : "error");
+
+	stats->n_pkts_learn[status] += 1;
+}
+
+/*
+ * forget.
+ */
+static inline void
+__instr_forget_exec(struct rte_swx_pipeline *p,
+		    struct thread *t,
+		    const struct instruction *ip __rte_unused)
+{
+	uint32_t learner_id = t->learner_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+
+	/* Table. */
+	rte_swx_table_learner_delete(ts->obj, l->mailbox);
+
+	TRACE("[Thread %2u] learner %u forget\n",
+	      p->thread_id,
+	      learner_id);
+
+	stats->n_pkts_forget += 1;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 09/24] pipeline: create inline functions for extern instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (6 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
                       ` (17 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the extern instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 22 +++---------
 lib/pipeline/rte_swx_pipeline_internal.h | 43 ++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ca12f34b01..c9e29230c2 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2317,18 +2317,10 @@ instr_extern_obj_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t obj_id = ip->ext_obj.ext_obj_id;
-	uint32_t func_id = ip->ext_obj.func_id;
-	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
-	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
-
-	TRACE("[Thread %2u] extern obj %u member func %u\n",
-	      p->thread_id,
-	      obj_id,
-	      func_id);
+	uint32_t done;
 
 	/* Extern object member function execute. */
-	uint32_t done = func(obj->obj, obj->mailbox);
+	done = __instr_extern_obj_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc_cond(t, done);
@@ -2340,16 +2332,10 @@ instr_extern_func_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint32_t ext_func_id = ip->ext_func.ext_func_id;
-	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
-	rte_swx_extern_func_t func = ext_func->func;
-
-	TRACE("[Thread %2u] extern func %u\n",
-	      p->thread_id,
-	      ext_func_id);
+	uint32_t done;
 
 	/* Extern function execute. */
-	uint32_t done = func(ext_func->mailbox);
+	done = __instr_extern_func_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc_cond(t, done);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 24096a23b6..14d6d88344 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2015,4 +2015,47 @@ __instr_forget_exec(struct rte_swx_pipeline *p,
 	stats->n_pkts_forget += 1;
 }
 
+/*
+ * extern.
+ */
+static inline uint32_t
+__instr_extern_obj_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint32_t obj_id = ip->ext_obj.ext_obj_id;
+	uint32_t func_id = ip->ext_obj.func_id;
+	struct extern_obj_runtime *obj = &t->extern_objs[obj_id];
+	rte_swx_extern_type_member_func_t func = obj->funcs[func_id];
+	uint32_t done;
+
+	TRACE("[Thread %2u] extern obj %u member func %u\n",
+	      p->thread_id,
+	      obj_id,
+	      func_id);
+
+	done = func(obj->obj, obj->mailbox);
+
+	return done;
+}
+
+static inline uint32_t
+__instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
+			 struct thread *t,
+			 const struct instruction *ip)
+{
+	uint32_t ext_func_id = ip->ext_func.ext_func_id;
+	struct extern_func_runtime *ext_func = &t->extern_funcs[ext_func_id];
+	rte_swx_extern_func_t func = ext_func->func;
+	uint32_t done;
+
+	TRACE("[Thread %2u] extern func %u\n",
+	      p->thread_id,
+	      ext_func_id);
+
+	done = func(ext_func->mailbox);
+
+	return done;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 10/24] pipeline: create inline functions for move instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (7 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
                       ` (16 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the move instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 26 +++---------
 lib/pipeline/rte_swx_pipeline_internal.h | 53 ++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index c9e29230c2..72606f1a06 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2407,10 +2407,7 @@ instr_mov_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov\n",
-	      p->thread_id);
-
-	MOV(t, ip);
+	__instr_mov_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2422,10 +2419,7 @@ instr_mov_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (mh)\n",
-	      p->thread_id);
-
-	MOV_MH(t, ip);
+	__instr_mov_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2437,10 +2431,7 @@ instr_mov_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (hm)\n",
-	      p->thread_id);
-
-	MOV_HM(t, ip);
+	__instr_mov_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2452,10 +2443,7 @@ instr_mov_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov (hh)\n",
-	      p->thread_id);
-
-	MOV_HH(t, ip);
+	__instr_mov_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2467,11 +2455,7 @@ instr_mov_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n",
-	      p->thread_id,
-	      ip->mov.src_val);
-
-	MOV_I(t, ip);
+	__instr_mov_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 14d6d88344..1bf94159a9 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2058,4 +2058,57 @@ __instr_extern_func_exec(struct rte_swx_pipeline *p __rte_unused,
 	return done;
 }
 
+/*
+ * mov.
+ */
+static inline void
+__instr_mov_exec(struct rte_swx_pipeline *p __rte_unused,
+		 struct thread *t,
+		 const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov\n", p->thread_id);
+
+	MOV(t, ip);
+}
+
+static inline void
+__instr_mov_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (mh)\n", p->thread_id);
+
+	MOV_MH(t, ip);
+}
+
+static inline void
+__instr_mov_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (hm)\n", p->thread_id);
+
+	MOV_HM(t, ip);
+}
+
+static inline void
+__instr_mov_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov (hh)\n", p->thread_id);
+
+	MOV_HH(t, ip);
+}
+
+static inline void
+__instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		   struct thread *t,
+		   const struct instruction *ip)
+{
+	TRACE("[Thread %2u] mov m.f %" PRIx64 "\n", p->thread_id, ip->mov.src_val);
+
+	MOV_I(t, ip);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 11/24] pipeline: create inline functions for DMA instruction
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (8 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
                       ` (15 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the DMA instruction.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          |  80 ++++++------------
 lib/pipeline/rte_swx_pipeline_internal.h | 100 +++++++++++++++++++++++
 2 files changed, 123 insertions(+), 57 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 72606f1a06..a06dc8d348 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2465,46 +2465,12 @@ instr_mov_i_exec(struct rte_swx_pipeline *p)
  * dma.
  */
 static inline void
-__instr_dma_ht_exec(struct rte_swx_pipeline *p, uint32_t n_dma);
-
-static inline void
-__instr_dma_ht_exec(struct rte_swx_pipeline *p, uint32_t n_dma)
+instr_dma_ht_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *action_data = t->structs[0];
-	uint64_t valid_headers = t->valid_headers;
-	uint32_t i;
-
-	for (i = 0; i < n_dma; i++) {
-		uint32_t header_id = ip->dma.dst.header_id[i];
-		uint32_t struct_id = ip->dma.dst.struct_id[i];
-		uint32_t offset = ip->dma.src.offset[i];
-		uint32_t n_bytes = ip->dma.n_bytes[i];
-
-		struct header_runtime *h = &t->headers[header_id];
-		uint8_t *h_ptr0 = h->ptr0;
-		uint8_t *h_ptr = t->structs[struct_id];
-
-		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
-			h_ptr : h_ptr0;
-		void *src = &action_data[offset];
-
-		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
 
-		/* Headers. */
-		memcpy(dst, src, n_bytes);
-		t->structs[struct_id] = dst;
-		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
-	}
-
-	t->valid_headers = valid_headers;
-}
-
-static inline void
-instr_dma_ht_exec(struct rte_swx_pipeline *p)
-{
-	__instr_dma_ht_exec(p, 1);
+	__instr_dma_ht_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2513,10 +2479,10 @@ instr_dma_ht_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht2_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 2);
+	__instr_dma_ht2_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2525,10 +2491,10 @@ instr_dma_ht2_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht3_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 3);
+	__instr_dma_ht3_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2537,10 +2503,10 @@ instr_dma_ht3_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht4_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 4);
+	__instr_dma_ht4_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2549,10 +2515,10 @@ instr_dma_ht4_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht5_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 5);
+	__instr_dma_ht5_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2561,10 +2527,10 @@ instr_dma_ht5_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht6_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 6);
+	__instr_dma_ht6_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2573,10 +2539,10 @@ instr_dma_ht6_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht7_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 7);
+	__instr_dma_ht7_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -2585,10 +2551,10 @@ instr_dma_ht7_exec(struct rte_swx_pipeline *p)
 static inline void
 instr_dma_ht8_exec(struct rte_swx_pipeline *p)
 {
-	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n",
-	      p->thread_id);
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
 
-	__instr_dma_ht_exec(p, 8);
+	__instr_dma_ht8_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 1bf94159a9..ec8e342a5d 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2111,4 +2111,104 @@ __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
 	MOV_I(t, ip);
 }
 
+/*
+ * dma.
+ */
+static inline void
+__instr_dma_ht_many_exec(struct rte_swx_pipeline *p __rte_unused,
+			 struct thread *t,
+			 const struct instruction *ip,
+			 uint32_t n_dma)
+{
+	uint8_t *action_data = t->structs[0];
+	uint64_t valid_headers = t->valid_headers;
+	uint32_t i;
+
+	for (i = 0; i < n_dma; i++) {
+		uint32_t header_id = ip->dma.dst.header_id[i];
+		uint32_t struct_id = ip->dma.dst.struct_id[i];
+		uint32_t offset = ip->dma.src.offset[i];
+		uint32_t n_bytes = ip->dma.n_bytes[i];
+
+		struct header_runtime *h = &t->headers[header_id];
+		uint8_t *h_ptr0 = h->ptr0;
+		uint8_t *h_ptr = t->structs[struct_id];
+
+		void *dst = MASK64_BIT_GET(valid_headers, header_id) ?
+			h_ptr : h_ptr0;
+		void *src = &action_data[offset];
+
+		TRACE("[Thread %2u] dma h.s t.f\n", p->thread_id);
+
+		/* Headers. */
+		memcpy(dst, src, n_bytes);
+		t->structs[struct_id] = dst;
+		valid_headers = MASK64_BIT_SET(valid_headers, header_id);
+	}
+
+	t->valid_headers = valid_headers;
+}
+
+static inline void
+__instr_dma_ht_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	__instr_dma_ht_many_exec(p, t, ip, 1);
+}
+
+static inline void
+__instr_dma_ht2_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 2 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 2);
+}
+
+static inline void
+__instr_dma_ht3_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 3 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 3);
+}
+
+static inline void
+__instr_dma_ht4_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 4 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 4);
+}
+
+static inline void
+__instr_dma_ht5_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 5 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 5);
+}
+
+static inline void
+__instr_dma_ht6_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 6 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 6);
+}
+
+static inline void
+__instr_dma_ht7_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 7 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 7);
+}
+
+static inline void
+__instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	TRACE("[Thread %2u] *** The next 8 instructions are fused. ***\n", p->thread_id);
+
+	__instr_dma_ht_many_exec(p, t, ip, 8);
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 12/24] pipeline: create inline functions for ALU instructions
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (9 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
                       ` (14 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the ALU instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 348 ++-----------
 lib/pipeline/rte_swx_pipeline_internal.h | 616 +++++++++++++++++++++++
 2 files changed, 660 insertions(+), 304 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index a06dc8d348..8956b6de27 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3044,10 +3044,8 @@ instr_alu_add_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add\n", p->thread_id);
-
-	/* Structs. */
-	ALU(t, ip, +);
+	/* Structs */
+	__instr_alu_add_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3059,10 +3057,8 @@ instr_alu_add_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, +);
+	__instr_alu_add_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3074,10 +3070,8 @@ instr_alu_add_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, +);
+	__instr_alu_add_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3089,10 +3083,8 @@ instr_alu_add_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, +);
+	__instr_alu_add_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3104,10 +3096,8 @@ instr_alu_add_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, +);
+	__instr_alu_add_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3119,10 +3109,8 @@ instr_alu_add_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, +);
+	__instr_alu_add_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3134,10 +3122,8 @@ instr_alu_sub_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, -);
+	__instr_alu_sub_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3149,10 +3135,8 @@ instr_alu_sub_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, -);
+	__instr_alu_sub_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3164,10 +3148,8 @@ instr_alu_sub_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, -);
+	__instr_alu_sub_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3179,10 +3161,8 @@ instr_alu_sub_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, -);
+	__instr_alu_sub_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3194,10 +3174,8 @@ instr_alu_sub_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, -);
+	__instr_alu_sub_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3209,10 +3187,8 @@ instr_alu_sub_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, -);
+	__instr_alu_sub_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3224,10 +3200,8 @@ instr_alu_shl_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, <<);
+	__instr_alu_shl_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3239,10 +3213,8 @@ instr_alu_shl_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, <<);
+	__instr_alu_shl_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3254,10 +3226,8 @@ instr_alu_shl_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, <<);
+	__instr_alu_shl_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3269,10 +3239,8 @@ instr_alu_shl_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, <<);
+	__instr_alu_shl_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3284,10 +3252,8 @@ instr_alu_shl_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, <<);
+	__instr_alu_shl_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3299,10 +3265,8 @@ instr_alu_shl_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, <<);
+	__instr_alu_shl_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3314,10 +3278,8 @@ instr_alu_shr_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, >>);
+	__instr_alu_shr_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3329,10 +3291,8 @@ instr_alu_shr_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, >>);
+	__instr_alu_shr_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3344,10 +3304,8 @@ instr_alu_shr_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM(t, ip, >>);
+	__instr_alu_shr_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3359,10 +3317,8 @@ instr_alu_shr_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH(t, ip, >>);
+	__instr_alu_shr_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3374,10 +3330,8 @@ instr_alu_shr_mi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MI(t, ip, >>);
+	__instr_alu_shr_mi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3389,10 +3343,8 @@ instr_alu_shr_hi_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HI(t, ip, >>);
+	__instr_alu_shr_hi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3404,10 +3356,8 @@ instr_alu_and_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, &);
+	__instr_alu_and_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3419,10 +3369,8 @@ instr_alu_and_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, &);
+	__instr_alu_and_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3434,10 +3382,8 @@ instr_alu_and_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, &);
+	__instr_alu_and_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3449,10 +3395,8 @@ instr_alu_and_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, &);
+	__instr_alu_and_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3464,10 +3408,8 @@ instr_alu_and_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] and (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, &);
+	__instr_alu_and_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3479,10 +3421,8 @@ instr_alu_or_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, |);
+	__instr_alu_or_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3494,10 +3434,8 @@ instr_alu_or_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, |);
+	__instr_alu_or_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3509,10 +3447,8 @@ instr_alu_or_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, |);
+	__instr_alu_or_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3524,10 +3460,8 @@ instr_alu_or_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, |);
+	__instr_alu_or_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3539,10 +3473,8 @@ instr_alu_or_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] or (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, |);
+	__instr_alu_or_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3554,10 +3486,8 @@ instr_alu_xor_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor\n", p->thread_id);
-
 	/* Structs. */
-	ALU(t, ip, ^);
+	__instr_alu_xor_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3569,10 +3499,8 @@ instr_alu_xor_mh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_MH(t, ip, ^);
+	__instr_alu_xor_mh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3584,10 +3512,8 @@ instr_alu_xor_hm_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HM_FAST(t, ip, ^);
+	__instr_alu_xor_hm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3599,10 +3525,8 @@ instr_alu_xor_hh_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_HH_FAST(t, ip, ^);
+	__instr_alu_xor_hh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3614,10 +3538,8 @@ instr_alu_xor_i_exec(struct rte_swx_pipeline *p)
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
 
-	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
-
 	/* Structs. */
-	ALU_I(t, ip, ^);
+	__instr_alu_xor_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3628,55 +3550,9 @@ instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* The first input (r) is a 16-bit number. The second and the third
-	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
-	 * three numbers (output r) is a 34-bit number.
-	 */
-	r += (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is an 18-bit
-	 * number. In the worst case scenario, the sum of the two numbers is a
-	 * 19-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
-	 * therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3687,67 +3563,9 @@ instr_alu_cksub_field_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr, dst;
-	uint64_t *src64_ptr, src64, src64_mask, src;
-	uint64_t r;
-
-	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-	dst = *dst16_ptr;
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
-	src64 = *src64_ptr;
-	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
-	src = src64 & src64_mask;
-
-	r = dst;
-	r = ~r & 0xFFFF;
-
-	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
-	 * the following sequence of operations in 2's complement arithmetic:
-	 *    a '- b = (a - b) % 0xFFFF.
-	 *
-	 * In order to prevent an underflow for the below subtraction, in which
-	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
-	 * minuend), we first add a multiple of the 0xFFFF modulus to the
-	 * minuend. The number we add to the minuend needs to be a 34-bit number
-	 * or higher, so for readability reasons we picked the 36-bit multiple.
-	 * We are effectively turning the 16-bit minuend into a 36-bit number:
-	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
-	 */
-	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
-
-	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
-	 * result (the output r) is a 36-bit number.
-	 */
-	r -= (src >> 32) + (src & 0xFFFFFFFF);
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_cksub_field_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3758,47 +3576,9 @@ instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r0, r1;
-
-	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
-	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
-	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
-	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
-	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
-
-	/* The first input is a 16-bit number. The second input is a 19-bit
-	 * number. Their sum is a 20-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r0 = (r0 & 0xFFFF) + (r0 >> 16);
-
-	r0 = ~r0 & 0xFFFF;
-	r0 = r0 ? r0 : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r0;
+	__instr_alu_ckadd_struct20_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -3809,49 +3589,9 @@ instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint8_t *dst_struct, *src_struct;
-	uint16_t *dst16_ptr;
-	uint32_t *src32_ptr;
-	uint64_t r = 0;
-	uint32_t i;
-
-	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
 
 	/* Structs. */
-	dst_struct = t->structs[ip->alu.dst.struct_id];
-	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
-
-	src_struct = t->structs[ip->alu.src.struct_id];
-	src32_ptr = (uint32_t *)&src_struct[0];
-
-	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
-	 * Therefore, in the worst case scenario, a 35-bit number is added to a
-	 * 16-bit number (the input r), so the output r is 36-bit number.
-	 */
-	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
-		r += *src32_ptr;
-
-	/* The first input is a 16-bit number. The second input is a 20-bit
-	 * number. Their sum is a 21-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
-	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
-	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
-	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
-	 * generated, therefore the output r is always a 16-bit number.
-	 */
-	r = (r & 0xFFFF) + (r >> 16);
-
-	r = ~r & 0xFFFF;
-	r = r ? r : 0xFFFF;
-
-	*dst16_ptr = (uint16_t)r;
+	__instr_alu_ckadd_struct_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index ec8e342a5d..7c4a2c05ef 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2211,4 +2211,620 @@ __instr_dma_ht8_exec(struct rte_swx_pipeline *p, struct thread *t, const struct
 	__instr_dma_ht_many_exec(p, t, ip, 8);
 }
 
+/*
+ * alu.
+ */
+static inline void
+__instr_alu_add_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add\n", p->thread_id);
+
+	ALU(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, +);
+}
+
+static inline void
+__instr_alu_add_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] add (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, +);
+}
+
+static inline void
+__instr_alu_sub_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub\n", p->thread_id);
+
+	ALU(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, -);
+}
+
+static inline void
+__instr_alu_sub_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] sub (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, -);
+}
+
+static inline void
+__instr_alu_shl_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl\n", p->thread_id);
+
+	ALU(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (mi)\n", p->thread_id);
+
+	ALU_MI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shl_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shl (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, <<);
+}
+
+static inline void
+__instr_alu_shr_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr\n", p->thread_id);
+
+	ALU(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hm)\n", p->thread_id);
+
+	ALU_HM(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hh)\n", p->thread_id);
+
+	ALU_HH(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_mi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (mi)\n", p->thread_id);
+
+	/* Structs. */
+	ALU_MI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_shr_hi_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] shr (hi)\n", p->thread_id);
+
+	ALU_HI(t, ip, >>);
+}
+
+static inline void
+__instr_alu_and_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and\n", p->thread_id);
+
+	ALU(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, &);
+}
+
+static inline void
+__instr_alu_and_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] and (i)\n", p->thread_id);
+
+	ALU_I(t, ip, &);
+}
+
+static inline void
+__instr_alu_or_exec(struct rte_swx_pipeline *p __rte_unused,
+		    struct thread *t,
+		    const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or\n", p->thread_id);
+
+	ALU(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, |);
+}
+
+static inline void
+__instr_alu_or_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		      struct thread *t,
+		      const struct instruction *ip)
+{
+	TRACE("[Thread %2u] or (i)\n", p->thread_id);
+
+	ALU_I(t, ip, |);
+}
+
+static inline void
+__instr_alu_xor_exec(struct rte_swx_pipeline *p __rte_unused,
+		     struct thread *t,
+		     const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor\n", p->thread_id);
+
+	ALU(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_mh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (mh)\n", p->thread_id);
+
+	ALU_MH(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hm_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hm)\n", p->thread_id);
+
+	ALU_HM_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_hh_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (hh)\n", p->thread_id);
+
+	ALU_HH_FAST(t, ip, ^);
+}
+
+static inline void
+__instr_alu_xor_i_exec(struct rte_swx_pipeline *p __rte_unused,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	TRACE("[Thread %2u] xor (i)\n", p->thread_id);
+
+	ALU_I(t, ip, ^);
+}
+
+static inline void
+__instr_alu_ckadd_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] ckadd (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* The first input (r) is a 16-bit number. The second and the third
+	 * inputs are 32-bit numbers. In the worst case scenario, the sum of the
+	 * three numbers (output r) is a 34-bit number.
+	 */
+	r += (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is an 18-bit
+	 * number. In the worst case scenario, the sum of the two numbers is a
+	 * 19-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 3-bit number (0 .. 7). Their sum is a 17-bit number (0 .. 0x10006).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x10006), the output r is (0 .. 7). So no carry bit can be generated,
+	 * therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_cksub_field_exec(struct rte_swx_pipeline *p __rte_unused,
+			     struct thread *t,
+			     const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr, dst;
+	uint64_t *src64_ptr, src64, src64_mask, src;
+	uint64_t r;
+
+	TRACE("[Thread %2u] cksub (field)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+	dst = *dst16_ptr;
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src64_ptr = (uint64_t *)&src_struct[ip->alu.src.offset];
+	src64 = *src64_ptr;
+	src64_mask = UINT64_MAX >> (64 - ip->alu.src.n_bits);
+	src = src64 & src64_mask;
+
+	r = dst;
+	r = ~r & 0xFFFF;
+
+	/* Subtraction in 1's complement arithmetic (i.e. a '- b) is the same as
+	 * the following sequence of operations in 2's complement arithmetic:
+	 *    a '- b = (a - b) % 0xFFFF.
+	 *
+	 * In order to prevent an underflow for the below subtraction, in which
+	 * a 33-bit number (the subtrahend) is taken out of a 16-bit number (the
+	 * minuend), we first add a multiple of the 0xFFFF modulus to the
+	 * minuend. The number we add to the minuend needs to be a 34-bit number
+	 * or higher, so for readability reasons we picked the 36-bit multiple.
+	 * We are effectively turning the 16-bit minuend into a 36-bit number:
+	 *    (a - b) % 0xFFFF = (a + 0xFFFF00000 - b) % 0xFFFF.
+	 */
+	r += 0xFFFF00000ULL; /* The output r is a 36-bit number. */
+
+	/* A 33-bit number is subtracted from a 36-bit number (the input r). The
+	 * result (the output r) is a 36-bit number.
+	 */
+	r -= (src >> 32) + (src & 0xFFFFFFFF);
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1001E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
+static inline void
+__instr_alu_ckadd_struct20_exec(struct rte_swx_pipeline *p __rte_unused,
+				struct thread *t,
+				const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r0, r1;
+
+	TRACE("[Thread %2u] ckadd (struct of 20 bytes)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	r0 = src32_ptr[0]; /* r0 is a 32-bit number. */
+	r1 = src32_ptr[1]; /* r1 is a 32-bit number. */
+	r0 += src32_ptr[2]; /* The output r0 is a 33-bit number. */
+	r1 += src32_ptr[3]; /* The output r1 is a 33-bit number. */
+	r0 += r1 + src32_ptr[4]; /* The output r0 is a 35-bit number. */
+
+	/* The first input is a 16-bit number. The second input is a 19-bit
+	 * number. Their sum is a 20-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 4-bit number (0 .. 15). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1000E), the output r is (0 .. 15). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r0 = (r0 & 0xFFFF) + (r0 >> 16);
+
+	r0 = ~r0 & 0xFFFF;
+	r0 = r0 ? r0 : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r0;
+}
+
+static inline void
+__instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
+			      struct thread *t,
+			      const struct instruction *ip)
+{
+	uint8_t *dst_struct, *src_struct;
+	uint16_t *dst16_ptr;
+	uint32_t *src32_ptr;
+	uint64_t r = 0;
+	uint32_t i;
+
+	TRACE("[Thread %2u] ckadd (struct)\n", p->thread_id);
+
+	/* Structs. */
+	dst_struct = t->structs[ip->alu.dst.struct_id];
+	dst16_ptr = (uint16_t *)&dst_struct[ip->alu.dst.offset];
+
+	src_struct = t->structs[ip->alu.src.struct_id];
+	src32_ptr = (uint32_t *)&src_struct[0];
+
+	/* The max number of 32-bit words in a 256-byte header is 8 = 2^3.
+	 * Therefore, in the worst case scenario, a 35-bit number is added to a
+	 * 16-bit number (the input r), so the output r is 36-bit number.
+	 */
+	for (i = 0; i < ip->alu.src.n_bits / 32; i++, src32_ptr++)
+		r += *src32_ptr;
+
+	/* The first input is a 16-bit number. The second input is a 20-bit
+	 * number. Their sum is a 21-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* The first input is a 16-bit number (0 .. 0xFFFF). The second input is
+	 * a 5-bit number (0 .. 31). The sum is a 17-bit number (0 .. 0x1000E).
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	/* When the input r is (0 .. 0xFFFF), the output r is equal to the input
+	 * r, so the output is (0 .. 0xFFFF). When the input r is (0x10000 ..
+	 * 0x1001E), the output r is (0 .. 31). So no carry bit can be
+	 * generated, therefore the output r is always a 16-bit number.
+	 */
+	r = (r & 0xFFFF) + (r >> 16);
+
+	r = ~r & 0xFFFF;
+	r = r ? r : 0xFFFF;
+
+	*dst16_ptr = (uint16_t)r;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 13/24] pipeline: create inline functions for register instructions
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (10 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
                       ` (13 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the register instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 320 ++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 475 +++++++++++++++++++++++
 2 files changed, 502 insertions(+), 293 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 8956b6de27..c7117bb6da 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3901,134 +3901,14 @@ instr_regadd_translate(struct rte_swx_pipeline *p,
 	return 0;
 }
 
-static inline uint64_t *
-instr_regarray_regarray(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-	return r->regarray;
-}
-
-static inline uint64_t
-instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
-	uint64_t idx = idx64 & idx64_mask & r->size_mask;
-
-	return idx;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint64_t
-instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
-
-	return idx;
-}
-
-#else
-
-#define instr_regarray_idx_nbo instr_regarray_idx_hbo
-
-#endif
-
-static inline uint64_t
-instr_regarray_idx_imm(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
-
-	uint64_t idx = ip->regarray.idx_val & r->size_mask;
-
-	return idx;
-}
-
-static inline uint64_t
-instr_regarray_src_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return src;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint64_t
-instr_regarray_src_nbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
-
-	return src;
-}
-
-#else
-
-#define instr_regarray_src_nbo instr_regarray_src_hbo
-
-#endif
-
-static inline void
-instr_regarray_dst_hbo_src_hbo_set(struct thread *t, struct instruction *ip, uint64_t src)
-{
-	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline void
-instr_regarray_dst_nbo_src_hbo_set(struct thread *t, struct instruction *ip, uint64_t src)
-{
-	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
-
-	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-}
-
-#else
-
-#define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
-
-#endif
-
 static inline void
 instr_regprefetch_rh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_rh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4039,14 +3919,9 @@ instr_regprefetch_rm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_rm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4057,14 +3932,9 @@ instr_regprefetch_ri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	rte_prefetch0(&regarray[idx]);
+	__instr_regprefetch_ri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4075,14 +3945,9 @@ instr_regrd_hrh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hrh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4093,14 +3958,9 @@ instr_regrd_hrm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hrm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4111,14 +3971,9 @@ instr_regrd_mrh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mrh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4129,12 +3984,9 @@ instr_regrd_mrm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mrm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4145,14 +3997,9 @@ instr_regrd_hri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_hri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4163,14 +4010,9 @@ instr_regrd_mri_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx;
-
-	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+	__instr_regrd_mri_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4181,15 +4023,9 @@ instr_regwr_rhh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rhh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4200,15 +4036,9 @@ instr_regwr_rhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4219,15 +4049,9 @@ instr_regwr_rmh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rmh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4238,15 +4062,9 @@ instr_regwr_rmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4257,15 +4075,9 @@ instr_regwr_rhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4276,15 +4088,9 @@ instr_regwr_rmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4295,15 +4101,9 @@ instr_regwr_rih_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rih_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4314,15 +4114,9 @@ instr_regwr_rim_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] = src;
+	__instr_regwr_rim_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4333,15 +4127,9 @@ instr_regwr_rii_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] = src;
+	__instr_regwr_rii_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4352,15 +4140,9 @@ instr_regadd_rhh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rhh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4371,15 +4153,9 @@ instr_regadd_rhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4390,15 +4166,9 @@ instr_regadd_rmh_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rmh_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4409,15 +4179,9 @@ instr_regadd_rmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4428,15 +4192,9 @@ instr_regadd_rhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_nbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4447,15 +4205,9 @@ instr_regadd_rmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_hbo(p, t, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4466,15 +4218,9 @@ instr_regadd_rih_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_nbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rih_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4485,15 +4231,9 @@ instr_regadd_rim_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = instr_regarray_src_hbo(t, ip);
-	regarray[idx] += src;
+	__instr_regadd_rim_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4504,15 +4244,9 @@ instr_regadd_rii_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	uint64_t *regarray, idx, src;
-
-	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
 
 	/* Structs. */
-	regarray = instr_regarray_regarray(p, ip);
-	idx = instr_regarray_idx_imm(p, ip);
-	src = ip->regarray.dstsrc_val;
-	regarray[idx] += src;
+	__instr_regadd_rii_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7c4a2c05ef..2526c2f4c7 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -2827,4 +2827,479 @@ __instr_alu_ckadd_struct_exec(struct rte_swx_pipeline *p __rte_unused,
 	*dst16_ptr = (uint16_t)r;
 }
 
+/*
+ * Register array.
+ */
+static inline uint64_t *
+instr_regarray_regarray(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+	return r->regarray;
+}
+
+static inline uint64_t
+instr_regarray_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx64_mask = UINT64_MAX >> (64 - ip->regarray.idx.n_bits);
+	uint64_t idx = idx64 & idx64_mask & r->size_mask;
+
+	return idx;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_regarray_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->regarray.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->regarray.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx = (ntoh64(idx64) >> (64 - ip->regarray.idx.n_bits)) & r->size_mask;
+
+	return idx;
+}
+
+#else
+
+#define instr_regarray_idx_nbo instr_regarray_idx_hbo
+
+#endif
+
+static inline uint64_t
+instr_regarray_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct regarray_runtime *r = &p->regarray_runtime[ip->regarray.regarray_id];
+
+	uint64_t idx = ip->regarray.idx_val & r->size_mask;
+
+	return idx;
+}
+
+static inline uint64_t
+instr_regarray_src_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return src;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_regarray_src_nbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->regarray.dstsrc.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src = ntoh64(src64) >> (64 - ip->regarray.dstsrc.n_bits);
+
+	return src;
+}
+
+#else
+
+#define instr_regarray_src_nbo instr_regarray_src_hbo
+
+#endif
+
+static inline void
+instr_regarray_dst_hbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
+{
+	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline void
+instr_regarray_dst_nbo_src_hbo_set(struct thread *t, const struct instruction *ip, uint64_t src)
+{
+	uint8_t *dst_struct = t->structs[ip->regarray.dstsrc.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->regarray.dstsrc.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->regarray.dstsrc.n_bits);
+
+	src = hton64(src) >> (64 - ip->regarray.dstsrc.n_bits);
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+}
+
+#else
+
+#define instr_regarray_dst_nbo_src_hbo_set instr_regarray_dst_hbo_src_hbo_set
+
+#endif
+
+static inline void
+__instr_regprefetch_rh_exec(struct rte_swx_pipeline *p,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regprefetch_rm_exec(struct rte_swx_pipeline *p,
+			    struct thread *t,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[m])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regprefetch_ri_exec(struct rte_swx_pipeline *p,
+			    struct thread *t __rte_unused,
+			    const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regprefetch (r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	rte_prefetch0(&regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hrh_exec(struct rte_swx_pipeline *p,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hrm_exec(struct rte_swx_pipeline *p,
+		       struct thread *t,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[m])\n", p->thread_id);
+
+	/* Structs. */
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mrh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[h])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mrm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[m])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_hri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (h = r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	instr_regarray_dst_nbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regrd_mri_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx;
+
+	TRACE("[Thread %2u] regrd (m = r[i])\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	instr_regarray_dst_hbo_src_hbo_set(t, ip, regarray[idx]);
+}
+
+static inline void
+__instr_regwr_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[h] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[m] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regwr_rii_exec(struct rte_swx_pipeline *p,
+		       struct thread *t __rte_unused,
+		       const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regwr (r[i] = i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] = src;
+}
+
+static inline void
+__instr_regadd_rhh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmh_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[h] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_nbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[m] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_hbo(p, t, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rih_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += h)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_nbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rim_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += m)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = instr_regarray_src_hbo(t, ip);
+	regarray[idx] += src;
+}
+
+static inline void
+__instr_regadd_rii_exec(struct rte_swx_pipeline *p,
+			struct thread *t __rte_unused,
+			const struct instruction *ip)
+{
+	uint64_t *regarray, idx, src;
+
+	TRACE("[Thread %2u] regadd (r[i] += i)\n", p->thread_id);
+
+	regarray = instr_regarray_regarray(p, ip);
+	idx = instr_regarray_idx_imm(p, ip);
+	src = ip->regarray.dstsrc_val;
+	regarray[idx] += src;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 14/24] pipeline: create inline functions for meter instructions
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (11 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
                       ` (12 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions for the meter instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 457 +------------------
 lib/pipeline/rte_swx_pipeline_internal.h | 541 +++++++++++++++++++++++
 2 files changed, 558 insertions(+), 440 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index c7117bb6da..8b64c57652 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -4470,119 +4470,14 @@ instr_meter_translate(struct rte_swx_pipeline *p,
 	CHECK(0, EINVAL);
 }
 
-static inline struct meter *
-instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
-	uint64_t idx = idx64 & idx64_mask & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline struct meter *
-instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
-	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
-	uint64_t idx64 = *idx64_ptr;
-	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-#else
-
-#define instr_meter_idx_nbo instr_meter_idx_hbo
-
-#endif
-
-static inline struct meter *
-instr_meter_idx_imm(struct rte_swx_pipeline *p, struct instruction *ip)
-{
-	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
-
-	uint64_t idx =  ip->meter.idx_val & r->size_mask;
-
-	return &r->metarray[idx];
-}
-
-static inline uint32_t
-instr_meter_length_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return (uint32_t)src;
-}
-
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-
-static inline uint32_t
-instr_meter_length_nbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
-
-	return (uint32_t)src;
-}
-
-#else
-
-#define instr_meter_length_nbo instr_meter_length_hbo
-
-#endif
-
-static inline enum rte_color
-instr_meter_color_in_hbo(struct thread *t, struct instruction *ip)
-{
-	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
-	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
-	uint64_t src64 = *src64_ptr;
-	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
-	uint64_t src = src64 & src64_mask;
-
-	return (enum rte_color)src;
-}
-
-static inline void
-instr_meter_color_out_hbo_set(struct thread *t, struct instruction *ip, enum rte_color color_out)
-{
-	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
-	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
-	uint64_t dst64 = *dst64_ptr;
-	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
-
-	uint64_t src = (uint64_t)color_out;
-
-	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
-}
-
 static inline void
 instr_metprefetch_h_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_h_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4593,13 +4488,9 @@ instr_metprefetch_m_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_m_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4610,13 +4501,9 @@ instr_metprefetch_i_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-
-	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m);
+	__instr_metprefetch_i_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4627,35 +4514,9 @@ instr_meter_hhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4666,35 +4527,9 @@ instr_meter_hhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4705,73 +4540,22 @@ instr_meter_hmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
 }
+
 static inline void
 instr_meter_hmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_nbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_hmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4782,35 +4566,9 @@ instr_meter_mhm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mhm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4821,35 +4579,9 @@ instr_meter_mhi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mhi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4860,35 +4592,9 @@ instr_meter_mmm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mmm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4899,35 +4605,9 @@ instr_meter_mmi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_hbo(p, t, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_mmi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4938,35 +4618,9 @@ instr_meter_ihm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_ihm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -4977,35 +4631,9 @@ instr_meter_ihi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_nbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_ihi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
@@ -5016,73 +4644,22 @@ instr_meter_imm_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = instr_meter_color_in_hbo(t, ip);
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_imm_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
 }
+
 static inline void
 instr_meter_imi_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	struct meter *m;
-	uint64_t time, n_pkts, n_bytes;
-	uint32_t length;
-	enum rte_color color_in, color_out;
-
-	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
 
 	/* Structs. */
-	m = instr_meter_idx_imm(p, ip);
-	rte_prefetch0(m->n_pkts);
-	time = rte_get_tsc_cycles();
-	length = instr_meter_length_hbo(t, ip);
-	color_in = (enum rte_color)ip->meter.color_in_val;
-
-	color_out = rte_meter_trtcm_color_aware_check(&m->m,
-		&m->profile->profile,
-		time,
-		length,
-		color_in);
-
-	color_out &= m->color_mask;
-
-	n_pkts = m->n_pkts[color_out];
-	n_bytes = m->n_bytes[color_out];
-
-	instr_meter_color_out_hbo_set(t, ip, color_out);
-
-	m->n_pkts[color_out] = n_pkts + 1;
-	m->n_bytes[color_out] = n_bytes + length;
+	__instr_meter_imi_exec(p, t, ip);
 
 	/* Thread. */
 	thread_ip_inc(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 2526c2f4c7..791adfb471 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -3302,4 +3302,545 @@ __instr_regadd_rii_exec(struct rte_swx_pipeline *p,
 	regarray[idx] += src;
 }
 
+/*
+ * metarray.
+ */
+static inline struct meter *
+instr_meter_idx_hbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx64_mask = UINT64_MAX >> (64 - (ip)->meter.idx.n_bits);
+	uint64_t idx = idx64 & idx64_mask & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline struct meter *
+instr_meter_idx_nbo(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint8_t *idx_struct = t->structs[ip->meter.idx.struct_id];
+	uint64_t *idx64_ptr = (uint64_t *)&idx_struct[ip->meter.idx.offset];
+	uint64_t idx64 = *idx64_ptr;
+	uint64_t idx = (ntoh64(idx64) >> (64 - ip->meter.idx.n_bits)) & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+#else
+
+#define instr_meter_idx_nbo instr_meter_idx_hbo
+
+#endif
+
+static inline struct meter *
+instr_meter_idx_imm(struct rte_swx_pipeline *p, const struct instruction *ip)
+{
+	struct metarray_runtime *r = &p->metarray_runtime[ip->meter.metarray_id];
+
+	uint64_t idx =  ip->meter.idx_val & r->size_mask;
+
+	return &r->metarray[idx];
+}
+
+static inline uint32_t
+instr_meter_length_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->meter.length.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return (uint32_t)src;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint32_t
+instr_meter_length_nbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.length.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.length.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src = ntoh64(src64) >> (64 - ip->meter.length.n_bits);
+
+	return (uint32_t)src;
+}
+
+#else
+
+#define instr_meter_length_nbo instr_meter_length_hbo
+
+#endif
+
+static inline enum rte_color
+instr_meter_color_in_hbo(struct thread *t, const struct instruction *ip)
+{
+	uint8_t *src_struct = t->structs[ip->meter.color_in.struct_id];
+	uint64_t *src64_ptr = (uint64_t *)&src_struct[ip->meter.color_in.offset];
+	uint64_t src64 = *src64_ptr;
+	uint64_t src64_mask = UINT64_MAX >> (64 - ip->meter.color_in.n_bits);
+	uint64_t src = src64 & src64_mask;
+
+	return (enum rte_color)src;
+}
+
+static inline void
+instr_meter_color_out_hbo_set(struct thread *t,
+			      const struct instruction *ip,
+			      enum rte_color color_out)
+{
+	uint8_t *dst_struct = t->structs[ip->meter.color_out.struct_id];
+	uint64_t *dst64_ptr = (uint64_t *)&dst_struct[ip->meter.color_out.offset];
+	uint64_t dst64 = *dst64_ptr;
+	uint64_t dst64_mask = UINT64_MAX >> (64 - ip->meter.color_out.n_bits);
+
+	uint64_t src = (uint64_t)color_out;
+
+	*dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask);
+}
+
+static inline void
+__instr_metprefetch_h_exec(struct rte_swx_pipeline *p,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (h)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_metprefetch_m_exec(struct rte_swx_pipeline *p,
+			   struct thread *t,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (m)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_metprefetch_i_exec(struct rte_swx_pipeline *p,
+			   struct thread *t __rte_unused,
+			   const struct instruction *ip)
+{
+	struct meter *m;
+
+	TRACE("[Thread %2u] metprefetch (i)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m);
+}
+
+static inline void
+__instr_meter_hhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hhm)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hhi)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hmm)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_hmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (hmi)\n", p->thread_id);
+
+	m = instr_meter_idx_nbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mhm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mhm)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mhi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mhi)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mmm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mmm)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_mmi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (mmi)\n", p->thread_id);
+
+	m = instr_meter_idx_hbo(p, t, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_ihm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (ihm)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_ihi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (ihi)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_nbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_imm_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (imm)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = instr_meter_color_in_hbo(t, ip);
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
+static inline void
+__instr_meter_imi_exec(struct rte_swx_pipeline *p, struct thread *t, const struct instruction *ip)
+{
+	struct meter *m;
+	uint64_t time, n_pkts, n_bytes;
+	uint32_t length;
+	enum rte_color color_in, color_out;
+
+	TRACE("[Thread %2u] meter (imi)\n", p->thread_id);
+
+	m = instr_meter_idx_imm(p, ip);
+	rte_prefetch0(m->n_pkts);
+	time = rte_get_tsc_cycles();
+	length = instr_meter_length_hbo(t, ip);
+	color_in = (enum rte_color)ip->meter.color_in_val;
+
+	color_out = rte_meter_trtcm_color_aware_check(&m->m,
+		&m->profile->profile,
+		time,
+		length,
+		color_in);
+
+	color_out &= m->color_mask;
+
+	n_pkts = m->n_pkts[color_out];
+	n_bytes = m->n_bytes[color_out];
+
+	instr_meter_color_out_hbo_set(t, ip, color_out);
+
+	m->n_pkts[color_out] = n_pkts + 1;
+	m->n_bytes[color_out] = n_bytes + length;
+}
+
 #endif
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 15/24] pipeline: create inline functions for instruction operands
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (12 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
                       ` (11 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Create inline functions to get the instruction operands.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline_internal.h | 29 ++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 791adfb471..efd136196f 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -928,6 +928,35 @@ struct thread {
 #define HEADER_VALID(thread, header_id) \
 	MASK64_BIT_GET((thread)->valid_headers, header_id)
 
+static inline uint64_t
+instr_operand_hbo(struct thread *t, const struct instr_operand *x)
+{
+	uint8_t *x_struct = t->structs[x->struct_id];
+	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
+	uint64_t x64 = *x64_ptr;
+	uint64_t x64_mask = UINT64_MAX >> (64 - x->n_bits);
+
+	return x64 & x64_mask;
+}
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+static inline uint64_t
+instr_operand_nbo(struct thread *t, const struct instr_operand *x)
+{
+	uint8_t *x_struct = t->structs[x->struct_id];
+	uint64_t *x64_ptr = (uint64_t *)&x_struct[x->offset];
+	uint64_t x64 = *x64_ptr;
+
+	return ntoh64(x64) >> (64 - x->n_bits);
+}
+
+#else
+
+#define instr_operand_nbo instr_operand_hbo
+
+#endif
+
 #define ALU(thread, ip, operator)  \
 {                                                                              \
 	uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id];      \
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 16/24] pipeline: enable persistent instruction meta-data
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (13 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 17/24] pipeline: introduce action functions Cristian Dumitrescu
                       ` (10 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Save the instruction meta-data for later use instead of freeing it up
once the instruction translation is completed.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 9 ++++++---
 lib/pipeline/rte_swx_pipeline_internal.h | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 8b64c57652..4099e364f5 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -6456,13 +6456,14 @@ instruction_config(struct rte_swx_pipeline *p,
 
 	if (a) {
 		a->instructions = instr;
+		a->instruction_data = data;
 		a->n_instructions = n_instructions;
 	} else {
 		p->instructions = instr;
+		p->instruction_data = data;
 		p->n_instructions = n_instructions;
 	}
 
-	free(data);
 	return 0;
 
 error:
@@ -6811,8 +6812,8 @@ action_build(struct rte_swx_pipeline *p)
 {
 	struct action *action;
 
-	p->action_instructions = calloc(p->n_actions,
-					sizeof(struct instruction *));
+	/* p->action_instructions. */
+	p->action_instructions = calloc(p->n_actions, sizeof(struct instruction *));
 	CHECK(p->action_instructions, ENOMEM);
 
 	TAILQ_FOREACH(action, &p->actions, node)
@@ -6841,6 +6842,7 @@ action_free(struct rte_swx_pipeline *p)
 			break;
 
 		TAILQ_REMOVE(&p->actions, action, node);
+		free(action->instruction_data);
 		free(action->instructions);
 		free(action);
 	}
@@ -8777,6 +8779,7 @@ rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 	if (!p)
 		return;
 
+	free(p->instruction_data);
 	free(p->instructions);
 
 	metarray_free(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index efd136196f..7a02d6cb5f 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -693,6 +693,7 @@ struct action {
 	struct struct_type *st;
 	int *args_endianness; /* 0 = Host Byte Order (HBO); 1 = Network Byte Order (NBO). */
 	struct instruction *instructions;
+	struct instruction_data *instruction_data;
 	uint32_t n_instructions;
 	uint32_t id;
 };
@@ -1388,6 +1389,7 @@ struct rte_swx_pipeline {
 	struct regarray_runtime *regarray_runtime;
 	struct metarray_runtime *metarray_runtime;
 	struct instruction *instructions;
+	struct instruction_data *instruction_data;
 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
 
 	uint32_t n_structs;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 17/24] pipeline: introduce action functions
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (14 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
                       ` (9 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

For better performance, the option to run a single function per action
is now provided, which requires a single function call per action that
can be better optimized by the C compiler, as opposed to one function
call per instruction. Special table lookup instructions are added to
to support this feature.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 127 +++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h |   6 ++
 2 files changed, 133 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 4099e364f5..0d02548137 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2097,6 +2097,62 @@ instr_table_exec(struct rte_swx_pipeline *p)
 	thread_ip_action_call(p, t, action_id);
 }
 
+static inline void
+instr_table_af_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	uint32_t table_id = ip->table.table_id;
+	struct rte_swx_table_state *ts = &t->table_state[table_id];
+	struct table_runtime *table = &t->tables[table_id];
+	struct table_statistics *stats = &p->table_stats[table_id];
+	uint64_t action_id, n_pkts_hit, n_pkts_action;
+	uint8_t *action_data;
+	action_func_t action_func;
+	int done, hit;
+
+	/* Table. */
+	done = table->func(ts->obj,
+			   table->mailbox,
+			   table->key,
+			   &action_id,
+			   &action_data,
+			   &hit);
+	if (!done) {
+		/* Thread. */
+		TRACE("[Thread %2u] table %u (not finalized)\n",
+		      p->thread_id,
+		      table_id);
+
+		thread_yield(p);
+		return;
+	}
+
+	action_id = hit ? action_id : ts->default_action_id;
+	action_data = hit ? action_data : ts->default_action_data;
+	action_func = p->action_funcs[action_id];
+	n_pkts_hit = stats->n_pkts_hit[hit];
+	n_pkts_action = stats->n_pkts_action[action_id];
+
+	TRACE("[Thread %2u] table %u (%s, action %u)\n",
+	      p->thread_id,
+	      table_id,
+	      hit ? "hit" : "miss",
+	      (uint32_t)action_id);
+
+	t->action_id = action_id;
+	t->structs[0] = action_data;
+	t->hit = hit;
+	stats->n_pkts_hit[hit] = n_pkts_hit + 1;
+	stats->n_pkts_action[action_id] = n_pkts_action + 1;
+
+	/* Thread. */
+	thread_ip_inc(p);
+
+	/* Action. */
+	action_func(p);
+}
+
 static inline void
 instr_selector_exec(struct rte_swx_pipeline *p)
 {
@@ -2193,6 +2249,68 @@ instr_learner_exec(struct rte_swx_pipeline *p)
 	thread_ip_action_call(p, t, action_id);
 }
 
+static inline void
+instr_learner_af_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+	uint32_t learner_id = ip->table.table_id;
+	struct rte_swx_table_state *ts = &t->table_state[p->n_tables +
+		p->n_selectors + learner_id];
+	struct learner_runtime *l = &t->learners[learner_id];
+	struct learner_statistics *stats = &p->learner_stats[learner_id];
+	uint64_t action_id, n_pkts_hit, n_pkts_action, time;
+	uint8_t *action_data;
+	action_func_t action_func;
+	int done, hit;
+
+	/* Table. */
+	time = rte_get_tsc_cycles();
+
+	done = rte_swx_table_learner_lookup(ts->obj,
+					    l->mailbox,
+					    time,
+					    l->key,
+					    &action_id,
+					    &action_data,
+					    &hit);
+	if (!done) {
+		/* Thread. */
+		TRACE("[Thread %2u] learner %u (not finalized)\n",
+		      p->thread_id,
+		      learner_id);
+
+		thread_yield(p);
+		return;
+	}
+
+	action_id = hit ? action_id : ts->default_action_id;
+	action_data = hit ? action_data : ts->default_action_data;
+	action_func = p->action_funcs[action_id];
+	n_pkts_hit = stats->n_pkts_hit[hit];
+	n_pkts_action = stats->n_pkts_action[action_id];
+
+	TRACE("[Thread %2u] learner %u (%s, action %u)\n",
+	      p->thread_id,
+	      learner_id,
+	      hit ? "hit" : "miss",
+	      (uint32_t)action_id);
+
+	t->action_id = action_id;
+	t->structs[0] = action_data;
+	t->hit = hit;
+	t->learner_id = learner_id;
+	t->time = time;
+	stats->n_pkts_hit[hit] = n_pkts_hit + 1;
+	stats->n_pkts_action[action_id] = n_pkts_action + 1;
+
+	/* Thread. */
+	thread_ip_action_call(p, t, action_id);
+
+	/* Action */
+	action_func(p);
+}
+
 /*
  * learn.
  */
@@ -6618,8 +6736,10 @@ static instr_exec_t instruction_table[] = {
 	[INSTR_METER_IMI] = instr_meter_imi_exec,
 
 	[INSTR_TABLE] = instr_table_exec,
+	[INSTR_TABLE_AF] = instr_table_af_exec,
 	[INSTR_SELECTOR] = instr_selector_exec,
 	[INSTR_LEARNER] = instr_learner_exec,
+	[INSTR_LEARNER_AF] = instr_learner_af_exec,
 	[INSTR_LEARNER_LEARN] = instr_learn_exec,
 	[INSTR_LEARNER_FORGET] = instr_forget_exec,
 	[INSTR_EXTERN_OBJ] = instr_extern_obj_exec,
@@ -6819,12 +6939,19 @@ action_build(struct rte_swx_pipeline *p)
 	TAILQ_FOREACH(action, &p->actions, node)
 		p->action_instructions[action->id] = action->instructions;
 
+	/* p->action_funcs. */
+	p->action_funcs = calloc(p->n_actions, sizeof(action_func_t));
+	CHECK(p->action_funcs, ENOMEM);
+
 	return 0;
 }
 
 static void
 action_build_free(struct rte_swx_pipeline *p)
 {
+	free(p->action_funcs);
+	p->action_funcs = NULL;
+
 	free(p->action_instructions);
 	p->action_instructions = NULL;
 }
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7a02d6cb5f..3578a10501 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -443,8 +443,10 @@ enum instruction_type {
 
 	/* table TABLE */
 	INSTR_TABLE,
+	INSTR_TABLE_AF,
 	INSTR_SELECTOR,
 	INSTR_LEARNER,
+	INSTR_LEARNER_AF,
 
 	/* learn LEARNER ACTION_NAME */
 	INSTR_LEARNER_LEARN,
@@ -687,6 +689,9 @@ struct instruction_data {
 /*
  * Action.
  */
+typedef void
+(*action_func_t)(struct rte_swx_pipeline *p);
+
 struct action {
 	TAILQ_ENTRY(action) node;
 	char name[RTE_SWX_NAME_SIZE];
@@ -1382,6 +1387,7 @@ struct rte_swx_pipeline {
 	struct port_in_runtime *in;
 	struct port_out_runtime *out;
 	struct instruction **action_instructions;
+	action_func_t *action_funcs;
 	struct rte_swx_table_state *table_state;
 	struct table_statistics *table_stats;
 	struct selector_statistics *selector_stats;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 18/24] pipeline: introduce custom instructions
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (15 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 17/24] pipeline: introduce action functions Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
                       ` (8 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

For better performance, the option to create custom instructions when
the program is translated and add them on-the-fly to the pipeline is
now provided. Multiple regular instructions can now be consolidated
into a single C function optimized by the C compiler directly.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
V3:
-created per pipeline instance instruction table

 lib/pipeline/rte_swx_pipeline.c          | 39 ++++++++++++++++++++++--
 lib/pipeline/rte_swx_pipeline_internal.h | 10 ++++++
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 0d02548137..9afe42ed25 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -6590,8 +6590,6 @@ instruction_config(struct rte_swx_pipeline *p,
 	return err;
 }
 
-typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
-
 static instr_exec_t instruction_table[] = {
 	[INSTR_RX] = instr_rx_exec,
 	[INSTR_TX] = instr_tx_exec,
@@ -6782,12 +6780,41 @@ static instr_exec_t instruction_table[] = {
 	[INSTR_RETURN] = instr_return_exec,
 };
 
+static int
+instruction_table_build(struct rte_swx_pipeline *p)
+{
+	p->instruction_table = calloc(RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX,
+				      sizeof(struct instr_exec_t *));
+	if (!p->instruction_table)
+		return -EINVAL;
+
+	memcpy(p->instruction_table, instruction_table, sizeof(instruction_table));
+
+	return 0;
+}
+
+static void
+instruction_table_build_free(struct rte_swx_pipeline *p)
+{
+	if (!p->instruction_table)
+		return;
+
+	free(p->instruction_table);
+	p->instruction_table = NULL;
+}
+
+static void
+instruction_table_free(struct rte_swx_pipeline *p)
+{
+	instruction_table_build_free(p);
+}
+
 static inline void
 instr_exec(struct rte_swx_pipeline *p)
 {
 	struct thread *t = &p->threads[p->thread_id];
 	struct instruction *ip = t->ip;
-	instr_exec_t instr = instruction_table[ip->type];
+	instr_exec_t instr = p->instruction_table[ip->type];
 
 	instr(p);
 }
@@ -8916,6 +8943,7 @@ rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 	selector_free(p);
 	table_free(p);
 	action_free(p);
+	instruction_table_free(p);
 	metadata_free(p);
 	header_free(p);
 	extern_func_free(p);
@@ -8985,6 +9013,10 @@ rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 	if (status)
 		goto error;
 
+	status = instruction_table_build(p);
+	if (status)
+		goto error;
+
 	status = action_build(p);
 	if (status)
 		goto error;
@@ -9024,6 +9056,7 @@ rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 	selector_build_free(p);
 	table_build_free(p);
 	action_build_free(p);
+	instruction_table_build_free(p);
 	metadata_build_free(p);
 	header_build_free(p);
 	extern_func_build_free(p);
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 3578a10501..64625b40c6 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -541,6 +541,9 @@ enum instruction_type {
 	 * Return from action
 	 */
 	INSTR_RETURN,
+
+	/* Start of custom instructions. */
+	INSTR_CUSTOM_0,
 };
 
 struct instr_operand {
@@ -686,6 +689,8 @@ struct instruction_data {
 	int invalid;
 };
 
+typedef void (*instr_exec_t)(struct rte_swx_pipeline *);
+
 /*
  * Action.
  */
@@ -1363,6 +1368,10 @@ instr_operand_nbo(struct thread *t, const struct instr_operand *x)
 #define RTE_SWX_PIPELINE_THREADS_MAX 16
 #endif
 
+#ifndef RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX
+#define RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX 256
+#endif
+
 struct rte_swx_pipeline {
 	struct struct_type_tailq struct_types;
 	struct port_in_type_tailq port_in_types;
@@ -1396,6 +1405,7 @@ struct rte_swx_pipeline {
 	struct metarray_runtime *metarray_runtime;
 	struct instruction *instructions;
 	struct instruction_data *instruction_data;
+	instr_exec_t *instruction_table;
 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
 
 	uint32_t n_structs;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 19/24] pipeline: introduce pipeline compilation
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (16 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
                       ` (7 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Lay the foundation to generate C code for the pipeline: C functions
for actions and custom instructions are generated, built as shared
object library and loaded into the pipeline.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 44 +++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 9afe42ed25..392e2cf1bc 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -8977,6 +8977,9 @@ rte_swx_pipeline_instructions_config(struct rte_swx_pipeline *p,
 	return 0;
 }
 
+static int
+pipeline_compile(struct rte_swx_pipeline *p);
+
 int
 rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 {
@@ -9046,6 +9049,9 @@ rte_swx_pipeline_build(struct rte_swx_pipeline *p)
 		goto error;
 
 	p->build_done = 1;
+
+	pipeline_compile(p);
+
 	return 0;
 
 error:
@@ -9789,3 +9795,41 @@ rte_swx_ctl_meter_stats_read(struct rte_swx_pipeline *p,
 
 	return 0;
 }
+
+/*
+ * Pipeline compilation.
+ */
+static int
+pipeline_codegen(struct rte_swx_pipeline *p)
+{
+	FILE *f = NULL;
+
+	if (!p)
+		return -EINVAL;
+
+	/* Create the .c file. */
+	f = fopen("/tmp/pipeline.c", "w");
+	if (!f)
+		return -EIO;
+
+	/* Include the .h file. */
+	fprintf(f, "#include \"rte_swx_pipeline_internal.h\"\n");
+
+	/* Close the .c file. */
+	fclose(f);
+
+	return 0;
+}
+
+static int
+pipeline_compile(struct rte_swx_pipeline *p)
+{
+	int status = 0;
+
+	/* Code generation. */
+	status = pipeline_codegen(p);
+	if (status)
+		return status;
+
+	return status;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 20/24] pipeline: export pipeline instructions to file
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (17 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 21/24] pipeline: generate action functions Cristian Dumitrescu
                       ` (6 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Export the array of translated instructions to a C file. There is one
such array per action and one for the pipeline.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 1093 +++++++++++++++++++++++++++++++
 1 file changed, 1093 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 392e2cf1bc..c962283fed 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -9799,9 +9799,1093 @@ rte_swx_ctl_meter_stats_read(struct rte_swx_pipeline *p,
 /*
  * Pipeline compilation.
  */
+static const char *
+instr_type_to_name(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_RX: return "INSTR_RX";
+
+	case INSTR_TX: return "INSTR_TX";
+	case INSTR_TX_I: return "INSTR_TX_I";
+
+	case INSTR_HDR_EXTRACT: return "INSTR_HDR_EXTRACT";
+	case INSTR_HDR_EXTRACT2: return "INSTR_HDR_EXTRACT2";
+	case INSTR_HDR_EXTRACT3: return "INSTR_HDR_EXTRACT3";
+	case INSTR_HDR_EXTRACT4: return "INSTR_HDR_EXTRACT4";
+	case INSTR_HDR_EXTRACT5: return "INSTR_HDR_EXTRACT5";
+	case INSTR_HDR_EXTRACT6: return "INSTR_HDR_EXTRACT6";
+	case INSTR_HDR_EXTRACT7: return "INSTR_HDR_EXTRACT7";
+	case INSTR_HDR_EXTRACT8: return "INSTR_HDR_EXTRACT8";
+
+	case INSTR_HDR_EXTRACT_M: return "INSTR_HDR_EXTRACT_M";
+
+	case INSTR_HDR_LOOKAHEAD: return "INSTR_HDR_LOOKAHEAD";
+
+	case INSTR_HDR_EMIT: return "INSTR_HDR_EMIT";
+	case INSTR_HDR_EMIT_TX: return "INSTR_HDR_EMIT_TX";
+	case INSTR_HDR_EMIT2_TX: return "INSTR_HDR_EMIT2_TX";
+	case INSTR_HDR_EMIT3_TX: return "INSTR_HDR_EMIT3_TX";
+	case INSTR_HDR_EMIT4_TX: return "INSTR_HDR_EMIT4_TX";
+	case INSTR_HDR_EMIT5_TX: return "INSTR_HDR_EMIT5_TX";
+	case INSTR_HDR_EMIT6_TX: return "INSTR_HDR_EMIT6_TX";
+	case INSTR_HDR_EMIT7_TX: return "INSTR_HDR_EMIT7_TX";
+	case INSTR_HDR_EMIT8_TX: return "INSTR_HDR_EMIT8_TX";
+
+	case INSTR_HDR_VALIDATE: return "INSTR_HDR_VALIDATE";
+	case INSTR_HDR_INVALIDATE: return "INSTR_HDR_INVALIDATE";
+
+	case INSTR_MOV: return "INSTR_MOV";
+	case INSTR_MOV_MH: return "INSTR_MOV_MH";
+	case INSTR_MOV_HM: return "INSTR_MOV_HM";
+	case INSTR_MOV_HH: return "INSTR_MOV_HH";
+	case INSTR_MOV_I: return "INSTR_MOV_I";
+
+	case INSTR_DMA_HT: return "INSTR_DMA_HT";
+	case INSTR_DMA_HT2: return "INSTR_DMA_HT2";
+	case INSTR_DMA_HT3: return "INSTR_DMA_HT3";
+	case INSTR_DMA_HT4: return "INSTR_DMA_HT4";
+	case INSTR_DMA_HT5: return "INSTR_DMA_HT5";
+	case INSTR_DMA_HT6: return "INSTR_DMA_HT6";
+	case INSTR_DMA_HT7: return "INSTR_DMA_HT7";
+	case INSTR_DMA_HT8: return "INSTR_DMA_HT8";
+
+	case INSTR_ALU_ADD: return "INSTR_ALU_ADD";
+	case INSTR_ALU_ADD_MH: return "INSTR_ALU_ADD_MH";
+	case INSTR_ALU_ADD_HM: return "INSTR_ALU_ADD_HM";
+	case INSTR_ALU_ADD_HH: return "INSTR_ALU_ADD_HH";
+	case INSTR_ALU_ADD_MI: return "INSTR_ALU_ADD_MI";
+	case INSTR_ALU_ADD_HI: return "INSTR_ALU_ADD_HI";
+
+	case INSTR_ALU_SUB: return "INSTR_ALU_SUB";
+	case INSTR_ALU_SUB_MH: return "INSTR_ALU_SUB_MH";
+	case INSTR_ALU_SUB_HM: return "INSTR_ALU_SUB_HM";
+	case INSTR_ALU_SUB_HH: return "INSTR_ALU_SUB_HH";
+	case INSTR_ALU_SUB_MI: return "INSTR_ALU_SUB_MI";
+	case INSTR_ALU_SUB_HI: return "INSTR_ALU_SUB_HI";
+
+	case INSTR_ALU_CKADD_FIELD: return "INSTR_ALU_CKADD_FIELD";
+	case INSTR_ALU_CKADD_STRUCT20: return "INSTR_ALU_CKADD_STRUCT20";
+	case INSTR_ALU_CKADD_STRUCT: return "INSTR_ALU_CKADD_STRUCT";
+	case INSTR_ALU_CKSUB_FIELD: return "INSTR_ALU_CKSUB_FIELD";
+
+	case INSTR_ALU_AND: return "INSTR_ALU_AND";
+	case INSTR_ALU_AND_MH: return "INSTR_ALU_AND_MH";
+	case INSTR_ALU_AND_HM: return "INSTR_ALU_AND_HM";
+	case INSTR_ALU_AND_HH: return "INSTR_ALU_AND_HH";
+	case INSTR_ALU_AND_I: return "INSTR_ALU_AND_I";
+
+	case INSTR_ALU_OR: return "INSTR_ALU_OR";
+	case INSTR_ALU_OR_MH: return "INSTR_ALU_OR_MH";
+	case INSTR_ALU_OR_HM: return "INSTR_ALU_OR_HM";
+	case INSTR_ALU_OR_HH: return "INSTR_ALU_OR_HH";
+	case INSTR_ALU_OR_I: return "INSTR_ALU_OR_I";
+
+	case INSTR_ALU_XOR: return "INSTR_ALU_XOR";
+	case INSTR_ALU_XOR_MH: return "INSTR_ALU_XOR_MH";
+	case INSTR_ALU_XOR_HM: return "INSTR_ALU_XOR_HM";
+	case INSTR_ALU_XOR_HH: return "INSTR_ALU_XOR_HH";
+	case INSTR_ALU_XOR_I: return "INSTR_ALU_XOR_I";
+
+	case INSTR_ALU_SHL: return "INSTR_ALU_SHL";
+	case INSTR_ALU_SHL_MH: return "INSTR_ALU_SHL_MH";
+	case INSTR_ALU_SHL_HM: return "INSTR_ALU_SHL_HM";
+	case INSTR_ALU_SHL_HH: return "INSTR_ALU_SHL_HH";
+	case INSTR_ALU_SHL_MI: return "INSTR_ALU_SHL_MI";
+	case INSTR_ALU_SHL_HI: return "INSTR_ALU_SHL_HI";
+
+	case INSTR_ALU_SHR: return "INSTR_ALU_SHR";
+	case INSTR_ALU_SHR_MH: return "INSTR_ALU_SHR_MH";
+	case INSTR_ALU_SHR_HM: return "INSTR_ALU_SHR_HM";
+	case INSTR_ALU_SHR_HH: return "INSTR_ALU_SHR_HH";
+	case INSTR_ALU_SHR_MI: return "INSTR_ALU_SHR_MI";
+	case INSTR_ALU_SHR_HI: return "INSTR_ALU_SHR_HI";
+
+	case INSTR_REGPREFETCH_RH: return "INSTR_REGPREFETCH_RH";
+	case INSTR_REGPREFETCH_RM: return "INSTR_REGPREFETCH_RM";
+	case INSTR_REGPREFETCH_RI: return "INSTR_REGPREFETCH_RI";
+
+	case INSTR_REGRD_HRH: return "INSTR_REGRD_HRH";
+	case INSTR_REGRD_HRM: return "INSTR_REGRD_HRM";
+	case INSTR_REGRD_HRI: return "INSTR_REGRD_HRI";
+	case INSTR_REGRD_MRH: return "INSTR_REGRD_MRH";
+	case INSTR_REGRD_MRM: return "INSTR_REGRD_MRM";
+	case INSTR_REGRD_MRI: return "INSTR_REGRD_MRI";
+
+	case INSTR_REGWR_RHH: return "INSTR_REGWR_RHH";
+	case INSTR_REGWR_RHM: return "INSTR_REGWR_RHM";
+	case INSTR_REGWR_RHI: return "INSTR_REGWR_RHI";
+	case INSTR_REGWR_RMH: return "INSTR_REGWR_RMH";
+	case INSTR_REGWR_RMM: return "INSTR_REGWR_RMM";
+	case INSTR_REGWR_RMI: return "INSTR_REGWR_RMI";
+	case INSTR_REGWR_RIH: return "INSTR_REGWR_RIH";
+	case INSTR_REGWR_RIM: return "INSTR_REGWR_RIM";
+	case INSTR_REGWR_RII: return "INSTR_REGWR_RII";
+
+	case INSTR_REGADD_RHH: return "INSTR_REGADD_RHH";
+	case INSTR_REGADD_RHM: return "INSTR_REGADD_RHM";
+	case INSTR_REGADD_RHI: return "INSTR_REGADD_RHI";
+	case INSTR_REGADD_RMH: return "INSTR_REGADD_RMH";
+	case INSTR_REGADD_RMM: return "INSTR_REGADD_RMM";
+	case INSTR_REGADD_RMI: return "INSTR_REGADD_RMI";
+	case INSTR_REGADD_RIH: return "INSTR_REGADD_RIH";
+	case INSTR_REGADD_RIM: return "INSTR_REGADD_RIM";
+	case INSTR_REGADD_RII: return "INSTR_REGADD_RII";
+
+	case INSTR_METPREFETCH_H: return "INSTR_METPREFETCH_H";
+	case INSTR_METPREFETCH_M: return "INSTR_METPREFETCH_M";
+	case INSTR_METPREFETCH_I: return "INSTR_METPREFETCH_I";
+
+	case INSTR_METER_HHM: return "INSTR_METER_HHM";
+	case INSTR_METER_HHI: return "INSTR_METER_HHI";
+	case INSTR_METER_HMM: return "INSTR_METER_HMM";
+	case INSTR_METER_HMI: return "INSTR_METER_HMI";
+	case INSTR_METER_MHM: return "INSTR_METER_MHM";
+	case INSTR_METER_MHI: return "INSTR_METER_MHI";
+	case INSTR_METER_MMM: return "INSTR_METER_MMM";
+	case INSTR_METER_MMI: return "INSTR_METER_MMI";
+	case INSTR_METER_IHM: return "INSTR_METER_IHM";
+	case INSTR_METER_IHI: return "INSTR_METER_IHI";
+	case INSTR_METER_IMM: return "INSTR_METER_IMM";
+	case INSTR_METER_IMI: return "INSTR_METER_IMI";
+
+	case INSTR_TABLE: return "INSTR_TABLE";
+	case INSTR_TABLE_AF: return "INSTR_TABLE_AF";
+	case INSTR_SELECTOR: return "INSTR_SELECTOR";
+	case INSTR_LEARNER: return "INSTR_LEARNER";
+	case INSTR_LEARNER_AF: return "INSTR_LEARNER_AF";
+
+	case INSTR_LEARNER_LEARN: return "INSTR_LEARNER_LEARN";
+	case INSTR_LEARNER_FORGET: return "INSTR_LEARNER_FORGET";
+
+	case INSTR_EXTERN_OBJ: return "INSTR_EXTERN_OBJ";
+	case INSTR_EXTERN_FUNC: return "INSTR_EXTERN_FUNC";
+
+	case INSTR_JMP: return "INSTR_JMP";
+	case INSTR_JMP_VALID: return "INSTR_JMP_VALID";
+	case INSTR_JMP_INVALID: return "INSTR_JMP_INVALID";
+	case INSTR_JMP_HIT: return "INSTR_JMP_HIT";
+	case INSTR_JMP_MISS: return "INSTR_JMP_MISS";
+	case INSTR_JMP_ACTION_HIT: return "INSTR_JMP_ACTION_HIT";
+	case INSTR_JMP_ACTION_MISS: return "INSTR_JMP_ACTION_MISS";
+	case INSTR_JMP_EQ: return "INSTR_JMP_EQ";
+	case INSTR_JMP_EQ_MH: return "INSTR_JMP_EQ_MH";
+	case INSTR_JMP_EQ_HM: return "INSTR_JMP_EQ_HM";
+	case INSTR_JMP_EQ_HH: return "INSTR_JMP_EQ_HH";
+	case INSTR_JMP_EQ_I: return "INSTR_JMP_EQ_I";
+	case INSTR_JMP_NEQ: return "INSTR_JMP_NEQ";
+	case INSTR_JMP_NEQ_MH: return "INSTR_JMP_NEQ_MH";
+	case INSTR_JMP_NEQ_HM: return "INSTR_JMP_NEQ_HM";
+	case INSTR_JMP_NEQ_HH: return "INSTR_JMP_NEQ_HH";
+	case INSTR_JMP_NEQ_I: return "INSTR_JMP_NEQ_I";
+	case INSTR_JMP_LT: return "INSTR_JMP_LT";
+	case INSTR_JMP_LT_MH: return "INSTR_JMP_LT_MH";
+	case INSTR_JMP_LT_HM: return "INSTR_JMP_LT_HM";
+	case INSTR_JMP_LT_HH: return "INSTR_JMP_LT_HH";
+	case INSTR_JMP_LT_MI: return "INSTR_JMP_LT_MI";
+	case INSTR_JMP_LT_HI: return "INSTR_JMP_LT_HI";
+	case INSTR_JMP_GT: return "INSTR_JMP_GT";
+	case INSTR_JMP_GT_MH: return "INSTR_JMP_GT_MH";
+	case INSTR_JMP_GT_HM: return "INSTR_JMP_GT_HM";
+	case INSTR_JMP_GT_HH: return "INSTR_JMP_GT_HH";
+	case INSTR_JMP_GT_MI: return "INSTR_JMP_GT_MI";
+	case INSTR_JMP_GT_HI: return "INSTR_JMP_GT_HI";
+
+	case INSTR_RETURN: return "INSTR_RETURN";
+
+	default: return "INSTR_UNKNOWN";
+	}
+}
+
+typedef void
+(*instruction_export_t)(struct instruction *, FILE *);
+
+static void
+instr_io_export(struct instruction *instr, FILE *f)
+{
+	uint32_t n_io = 0, n_io_imm = 0, n_hdrs = 0, i;
+
+	/* n_io, n_io_imm, n_hdrs. */
+	if (instr->type == INSTR_RX ||
+	    instr->type == INSTR_TX ||
+	    instr->type == INSTR_HDR_EXTRACT_M ||
+	    (instr->type >= INSTR_HDR_EMIT_TX && instr->type <= INSTR_HDR_EMIT8_TX))
+		n_io = 1;
+
+	if (instr->type == INSTR_TX_I)
+		n_io_imm = 1;
+
+	if (instr->type >= INSTR_HDR_EXTRACT && instr->type <= INSTR_HDR_EXTRACT8)
+		n_hdrs = 1 + (instr->type - INSTR_HDR_EXTRACT);
+
+	if (instr->type == INSTR_HDR_EXTRACT_M ||
+	    instr->type == INSTR_HDR_LOOKAHEAD ||
+	    instr->type == INSTR_HDR_EMIT)
+		n_hdrs = 1;
+
+	if (instr->type >= INSTR_HDR_EMIT_TX && instr->type <= INSTR_HDR_EMIT8_TX)
+		n_hdrs = 1 + (instr->type - INSTR_HDR_EMIT_TX);
+
+	/* instr. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	/* instr.io. */
+	fprintf(f,
+		"\t\t.io = {\n");
+
+	/* instr.io.io. */
+	if (n_io)
+		fprintf(f,
+			"\t\t\t.io = {\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t},\n",
+			instr->io.io.offset,
+			instr->io.io.n_bits);
+
+	if (n_io_imm)
+		fprintf(f,
+			"\t\t\t.io = {\n"
+			"\t\t\t\t.val = %u,\n"
+			"\t\t\t},\n",
+			instr->io.io.val);
+
+	/* instr.io.hdr. */
+	if (n_hdrs) {
+		fprintf(f,
+			"\t\t.hdr = {\n");
+
+		/* instr.io.hdr.header_id. */
+		fprintf(f,
+			"\t\t\t.header_id = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.header_id[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr.struct_id. */
+		fprintf(f,
+			"\t\t\t.struct_id = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.struct_id[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr.n_bytes. */
+		fprintf(f,
+			"\t\t\t.n_bytes = {");
+
+		for (i = 0; i < n_hdrs; i++)
+			fprintf(f,
+				"%u, ",
+				instr->io.hdr.n_bytes[i]);
+
+		fprintf(f,
+			"},\n");
+
+		/* instr.io.hdr - closing curly brace. */
+		fprintf(f,
+			"\t\t\t}\n,");
+	}
+
+	/* instr.io - closing curly brace. */
+	fprintf(f,
+		"\t\t},\n");
+
+	/* instr - closing curly brace. */
+	fprintf(f,
+		"\t},\n");
+}
+
+static void
+instr_hdr_validate_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.valid = {\n"
+		"\t\t\t.header_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->valid.header_id);
+}
+
+static void
+instr_mov_export(struct instruction *instr, FILE *f)
+{
+	if (instr->type != INSTR_MOV_I)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.mov = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.src = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->mov.dst.struct_id,
+			instr->mov.dst.n_bits,
+			instr->mov.dst.offset,
+			instr->mov.src.struct_id,
+			instr->mov.src.n_bits,
+			instr->mov.src.offset);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.mov = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.src_val = %" PRIu64 ",\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->mov.dst.struct_id,
+			instr->mov.dst.n_bits,
+			instr->mov.dst.offset,
+			instr->mov.src_val);
+}
+
+static void
+instr_dma_ht_export(struct instruction *instr, FILE *f)
+{
+	uint32_t n_dma = 0, i;
+
+	/* n_dma. */
+	n_dma = 1 + (instr->type - INSTR_DMA_HT);
+
+	/* instr. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	/* instr.dma. */
+	fprintf(f,
+		"\t\t.dma = {\n");
+
+	/* instr.dma.dst. */
+	fprintf(f,
+		"\t\t\t.dst = {\n");
+
+	/* instr.dma.dst.header_id. */
+	fprintf(f,
+		"\t\t\t\t.header_id = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.dst.header_id[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.dst.struct_id. */
+	fprintf(f,
+		"\t\t\t\t.struct_id = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.dst.struct_id[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.dst - closing curly brace. */
+	fprintf(f,
+		"\t\t\t},\n");
+
+	/* instr.dma.src. */
+	fprintf(f,
+		"\t\t\t.src = {\n");
+
+	/* instr.dma.src.offset. */
+	fprintf(f,
+		"\t\t\t\t.offset = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.src.offset[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma.src - closing curly brace. */
+	fprintf(f,
+		"\t\t\t},\n");
+
+	/* instr.dma.n_bytes. */
+	fprintf(f,
+		"\t\t\t.n_bytes = {");
+
+	for (i = 0; i < n_dma; i++)
+		fprintf(f,
+			"%u, ",
+			instr->dma.n_bytes[i]);
+
+	fprintf(f,
+		"},\n");
+
+	/* instr.dma - closing curly brace. */
+	fprintf(f,
+		"\t\t},\n");
+
+	/* instr - closing curly brace. */
+	fprintf(f,
+		"\t},\n");
+}
+
+static void
+instr_alu_export(struct instruction *instr, FILE *f)
+{
+	int imm = 0;
+
+	if (instr->type == INSTR_ALU_ADD_MI ||
+	    instr->type == INSTR_ALU_ADD_HI ||
+	    instr->type == INSTR_ALU_SUB_MI ||
+	    instr->type == INSTR_ALU_SUB_HI ||
+	    instr->type == INSTR_ALU_SHL_MI ||
+	    instr->type == INSTR_ALU_SHL_HI ||
+	    instr->type == INSTR_ALU_SHR_MI ||
+	    instr->type == INSTR_ALU_SHR_HI ||
+	    instr->type == INSTR_ALU_AND_I ||
+	    instr->type == INSTR_ALU_OR_I ||
+	    instr->type == INSTR_ALU_XOR_I)
+		imm = 1;
+
+	if (!imm)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.alu = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.src = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->alu.dst.struct_id,
+			instr->alu.dst.n_bits,
+			instr->alu.dst.offset,
+			instr->alu.src.struct_id,
+			instr->alu.src.n_bits,
+			instr->alu.src.offset);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.alu = {\n"
+			"\t\t\t.dst = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.src_val = %" PRIu64 ",\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->alu.dst.struct_id,
+			instr->alu.dst.n_bits,
+			instr->alu.dst.offset,
+			instr->alu.src_val);
+}
+
+static void
+instr_reg_export(struct instruction *instr __rte_unused, FILE *f __rte_unused)
+{
+	int prefetch  = 0, idx_imm = 0, src_imm = 0;
+
+	if (instr->type == INSTR_REGPREFETCH_RH ||
+	    instr->type == INSTR_REGPREFETCH_RM ||
+	    instr->type == INSTR_REGPREFETCH_RI)
+		prefetch = 1;
+
+	/* index is the 3rd operand for the regrd instruction and the 2nd
+	 * operand for the regwr and regadd instructions.
+	 */
+	if (instr->type == INSTR_REGPREFETCH_RI ||
+	    instr->type == INSTR_REGRD_HRI ||
+	    instr->type == INSTR_REGRD_MRI ||
+	    instr->type == INSTR_REGWR_RIH ||
+	    instr->type == INSTR_REGWR_RIM ||
+	    instr->type == INSTR_REGWR_RII ||
+	    instr->type == INSTR_REGADD_RIH ||
+	    instr->type == INSTR_REGADD_RIM ||
+	    instr->type == INSTR_REGADD_RII)
+		idx_imm = 1;
+
+	/* src is the 3rd operand for the regwr and regadd instructions. */
+	if (instr->type == INSTR_REGWR_RHI ||
+	    instr->type == INSTR_REGWR_RMI ||
+	    instr->type == INSTR_REGWR_RII ||
+	    instr->type == INSTR_REGADD_RHI ||
+	    instr->type == INSTR_REGADD_RMI ||
+	    instr->type == INSTR_REGADD_RII)
+		src_imm = 1;
+
+	/* instr.regarray.regarray_id. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.regarray = {\n"
+		"\t\t\t.regarray_id = %u,\n",
+		instr_type_to_name(instr),
+		instr->regarray.regarray_id);
+
+	/* instr.regarray.idx / instr.regarray.idx_val. */
+	if (!idx_imm)
+		fprintf(f,
+			"\t\t\t\t.idx = {\n"
+			"\t\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t\t.offset = %u,\n"
+			"\t\t\t\t},\n",
+			instr->regarray.idx.struct_id,
+			instr->regarray.idx.n_bits,
+			instr->regarray.idx.offset);
+	else
+		fprintf(f,
+			"\t\t\t\t.idx_val = %u,\n",
+			instr->regarray.idx_val);
+
+	/* instr.regarray.dstsrc / instr.regarray.dstsrc_val. */
+	if (!prefetch) {
+		if (!src_imm)
+			fprintf(f,
+				"\t\t\t\t.dstsrc = {\n"
+				"\t\t\t\t\t.struct_id = %u,\n"
+				"\t\t\t\t\t.n_bits = %u,\n"
+				"\t\t\t\t\t.offset = %u,\n"
+				"\t\t\t\t},\n",
+				instr->regarray.dstsrc.struct_id,
+				instr->regarray.dstsrc.n_bits,
+				instr->regarray.dstsrc.offset);
+		else
+			fprintf(f,
+				"\t\t\t\t.dstsrc_val = %" PRIu64 ",\n",
+				instr->regarray.dstsrc_val);
+	}
+
+	/* instr.regarray and instr - closing curly braces. */
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+}
+
+static void
+instr_meter_export(struct instruction *instr __rte_unused, FILE *f __rte_unused)
+{
+	int prefetch  = 0, idx_imm = 0, color_in_imm = 0;
+
+	if (instr->type == INSTR_METPREFETCH_H ||
+	    instr->type == INSTR_METPREFETCH_M ||
+	    instr->type == INSTR_METPREFETCH_I)
+		prefetch = 1;
+
+	/* idx_imm. */
+	if (instr->type == INSTR_METPREFETCH_I ||
+	    instr->type == INSTR_METER_IHM ||
+	    instr->type == INSTR_METER_IHI ||
+	    instr->type == INSTR_METER_IMM ||
+	    instr->type == INSTR_METER_IMI)
+		idx_imm = 1;
+
+	/* color_in_imm. */
+	if (instr->type == INSTR_METER_HHI ||
+	    instr->type == INSTR_METER_HMI ||
+	    instr->type == INSTR_METER_MHI ||
+	    instr->type == INSTR_METER_MMI ||
+	    instr->type == INSTR_METER_IHI ||
+	    instr->type == INSTR_METER_IMI)
+		color_in_imm = 1;
+
+	/* instr.meter.metarray_id. */
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.meter = {\n"
+		"\t\t\t.metarray_id = %u,\n",
+		instr_type_to_name(instr),
+		instr->meter.metarray_id);
+
+	/* instr.meter.idx / instr.meter.idx_val. */
+	if (!idx_imm)
+		fprintf(f,
+			"\t\t\t.idx = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.idx.struct_id,
+			instr->meter.idx.n_bits,
+			instr->meter.idx.offset);
+	else
+		fprintf(f,
+			"\t\t\t.idx_val = %u,\n",
+			instr->meter.idx_val);
+
+	if (!prefetch) {
+		/* instr.meter.length. */
+		fprintf(f,
+			"\t\t\t.length = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.length.struct_id,
+			instr->meter.length.n_bits,
+			instr->meter.length.offset);
+
+		/* instr.meter.color_in / instr.meter.color_in_val. */
+		if (!color_in_imm)
+			fprintf(f,
+				"\t\t\t.color_in = {\n"
+				"\t\t\t\t.struct_id = %u,\n"
+				"\t\t\t\t.n_bits = %u,\n"
+				"\t\t\t\t.offset = %u,\n"
+				"\t\t\t},\n",
+				instr->meter.color_in.struct_id,
+				instr->meter.color_in.n_bits,
+				instr->meter.color_in.offset);
+		else
+			fprintf(f,
+				"\t\t\t.color_in_val = %u,\n",
+				(uint32_t)instr->meter.color_in_val);
+
+		/* instr.meter.color_out. */
+		fprintf(f,
+			"\t\t\t.color_out = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->meter.color_out.struct_id,
+			instr->meter.color_out.n_bits,
+			instr->meter.color_out.offset);
+	}
+
+	/* instr.meter and instr - closing curly braces. */
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+}
+
+static void
+instr_table_export(struct instruction *instr,
+		FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.table = {\n"
+		"\t\t\t.table_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->table.table_id);
+}
+
+static void
+instr_learn_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.learn = {\n"
+		"\t\t\t\t.action_id = %u,\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->learn.action_id);
+}
+
+static void
+instr_forget_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t},\n",
+		instr_type_to_name(instr));
+}
+
+static void
+instr_extern_export(struct instruction *instr, FILE *f)
+{
+	if (instr->type == INSTR_EXTERN_OBJ)
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.ext_obj = {\n"
+			"\t\t\t.ext_obj_id = %u,\n"
+			"\t\t\t.func_id = %u,\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->ext_obj.ext_obj_id,
+			instr->ext_obj.func_id);
+	else
+		fprintf(f,
+			"\t{\n"
+			"\t\t.type = %s,\n"
+			"\t\t.ext_func = {\n"
+			"\t\t\t.ext_func_id = %u,\n"
+			"\t\t},\n"
+			"\t},\n",
+			instr_type_to_name(instr),
+			instr->ext_func.ext_func_id);
+}
+
+static void
+instr_jmp_export(struct instruction *instr, FILE *f __rte_unused)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.jmp = {\n"
+		"\t\t\t.ip = NULL,\n",
+		instr_type_to_name(instr));
+
+	switch (instr->type) {
+	case INSTR_JMP_VALID:
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"\t\t\t.header_id = %u,\n",
+			instr->jmp.header_id);
+		break;
+
+	case INSTR_JMP_ACTION_HIT:
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"\t\t\t.action_id = %u,\n",
+			instr->jmp.action_id);
+		break;
+
+	case INSTR_JMP_EQ:
+	case INSTR_JMP_EQ_MH:
+	case INSTR_JMP_EQ_HM:
+	case INSTR_JMP_EQ_HH:
+	case INSTR_JMP_NEQ:
+	case INSTR_JMP_NEQ_MH:
+	case INSTR_JMP_NEQ_HM:
+	case INSTR_JMP_NEQ_HH:
+	case INSTR_JMP_LT:
+	case INSTR_JMP_LT_MH:
+	case INSTR_JMP_LT_HM:
+	case INSTR_JMP_LT_HH:
+	case INSTR_JMP_GT:
+	case INSTR_JMP_GT_MH:
+	case INSTR_JMP_GT_HM:
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"\t\t\t.a = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n"
+			"\t\t\t.b = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t},\n",
+			instr->jmp.a.struct_id,
+			instr->jmp.a.n_bits,
+			instr->jmp.a.offset,
+			instr->jmp.b.struct_id,
+			instr->jmp.b.n_bits,
+			instr->jmp.b.offset);
+		break;
+
+	case INSTR_JMP_EQ_I:
+	case INSTR_JMP_NEQ_I:
+	case INSTR_JMP_LT_MI:
+	case INSTR_JMP_LT_HI:
+	case INSTR_JMP_GT_MI:
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"\t\t\t.a = {\n"
+			"\t\t\t\t.struct_id = %u,\n"
+			"\t\t\t\t.n_bits = %u,\n"
+			"\t\t\t\t.offset = %u,\n"
+			"\t\t\t}\n,"
+			"\t\t\t.b_val = %" PRIu64 ",\n",
+			instr->jmp.a.struct_id,
+			instr->jmp.a.n_bits,
+			instr->jmp.a.offset,
+			instr->jmp.b_val);
+		break;
+
+	default:
+		break;
+	}
+
+	fprintf(f,
+		"\t\t},\n"
+		"\t},\n");
+}
+
+static void
+instr_return_export(struct instruction *instr,
+		FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n",
+		instr_type_to_name(instr));
+
+	fprintf(f,
+		"\t},\n");
+}
+
+static instruction_export_t export_table[] = {
+	[INSTR_RX] = instr_io_export,
+
+	[INSTR_TX] = instr_io_export,
+	[INSTR_TX_I] = instr_io_export,
+
+	[INSTR_HDR_EXTRACT] = instr_io_export,
+	[INSTR_HDR_EXTRACT2] = instr_io_export,
+	[INSTR_HDR_EXTRACT3] = instr_io_export,
+	[INSTR_HDR_EXTRACT4] = instr_io_export,
+	[INSTR_HDR_EXTRACT5] = instr_io_export,
+	[INSTR_HDR_EXTRACT6] = instr_io_export,
+	[INSTR_HDR_EXTRACT7] = instr_io_export,
+	[INSTR_HDR_EXTRACT8] = instr_io_export,
+
+	[INSTR_HDR_EXTRACT_M] = instr_io_export,
+
+	[INSTR_HDR_LOOKAHEAD] = instr_io_export,
+
+	[INSTR_HDR_EMIT] = instr_io_export,
+	[INSTR_HDR_EMIT_TX] = instr_io_export,
+	[INSTR_HDR_EMIT2_TX] = instr_io_export,
+	[INSTR_HDR_EMIT3_TX] = instr_io_export,
+	[INSTR_HDR_EMIT4_TX] = instr_io_export,
+	[INSTR_HDR_EMIT5_TX] = instr_io_export,
+	[INSTR_HDR_EMIT6_TX] = instr_io_export,
+	[INSTR_HDR_EMIT7_TX] = instr_io_export,
+	[INSTR_HDR_EMIT8_TX] = instr_io_export,
+
+	[INSTR_HDR_VALIDATE] = instr_hdr_validate_export,
+	[INSTR_HDR_INVALIDATE] = instr_hdr_validate_export,
+
+	[INSTR_MOV] = instr_mov_export,
+	[INSTR_MOV_MH] = instr_mov_export,
+	[INSTR_MOV_HM] = instr_mov_export,
+	[INSTR_MOV_HH] = instr_mov_export,
+	[INSTR_MOV_I] = instr_mov_export,
+
+	[INSTR_DMA_HT]  = instr_dma_ht_export,
+	[INSTR_DMA_HT2] = instr_dma_ht_export,
+	[INSTR_DMA_HT3] = instr_dma_ht_export,
+	[INSTR_DMA_HT4] = instr_dma_ht_export,
+	[INSTR_DMA_HT5] = instr_dma_ht_export,
+	[INSTR_DMA_HT6] = instr_dma_ht_export,
+	[INSTR_DMA_HT7] = instr_dma_ht_export,
+	[INSTR_DMA_HT8] = instr_dma_ht_export,
+
+	[INSTR_ALU_ADD] = instr_alu_export,
+	[INSTR_ALU_ADD_MH] = instr_alu_export,
+	[INSTR_ALU_ADD_HM] = instr_alu_export,
+	[INSTR_ALU_ADD_HH] = instr_alu_export,
+	[INSTR_ALU_ADD_MI] = instr_alu_export,
+	[INSTR_ALU_ADD_HI] = instr_alu_export,
+
+	[INSTR_ALU_SUB] = instr_alu_export,
+	[INSTR_ALU_SUB_MH] = instr_alu_export,
+	[INSTR_ALU_SUB_HM] = instr_alu_export,
+	[INSTR_ALU_SUB_HH] = instr_alu_export,
+	[INSTR_ALU_SUB_MI] = instr_alu_export,
+	[INSTR_ALU_SUB_HI] = instr_alu_export,
+
+	[INSTR_ALU_CKADD_FIELD] = instr_alu_export,
+	[INSTR_ALU_CKADD_STRUCT] = instr_alu_export,
+	[INSTR_ALU_CKADD_STRUCT20] = instr_alu_export,
+	[INSTR_ALU_CKSUB_FIELD] = instr_alu_export,
+
+	[INSTR_ALU_AND] = instr_alu_export,
+	[INSTR_ALU_AND_MH] = instr_alu_export,
+	[INSTR_ALU_AND_HM] = instr_alu_export,
+	[INSTR_ALU_AND_HH] = instr_alu_export,
+	[INSTR_ALU_AND_I] = instr_alu_export,
+
+	[INSTR_ALU_OR] = instr_alu_export,
+	[INSTR_ALU_OR_MH] = instr_alu_export,
+	[INSTR_ALU_OR_HM] = instr_alu_export,
+	[INSTR_ALU_OR_HH] = instr_alu_export,
+	[INSTR_ALU_OR_I] = instr_alu_export,
+
+	[INSTR_ALU_XOR] = instr_alu_export,
+	[INSTR_ALU_XOR_MH] = instr_alu_export,
+	[INSTR_ALU_XOR_HM] = instr_alu_export,
+	[INSTR_ALU_XOR_HH] = instr_alu_export,
+	[INSTR_ALU_XOR_I] = instr_alu_export,
+
+	[INSTR_ALU_SHL] = instr_alu_export,
+	[INSTR_ALU_SHL_MH] = instr_alu_export,
+	[INSTR_ALU_SHL_HM] = instr_alu_export,
+	[INSTR_ALU_SHL_HH] = instr_alu_export,
+	[INSTR_ALU_SHL_MI] = instr_alu_export,
+	[INSTR_ALU_SHL_HI] = instr_alu_export,
+
+	[INSTR_ALU_SHR] = instr_alu_export,
+	[INSTR_ALU_SHR_MH] = instr_alu_export,
+	[INSTR_ALU_SHR_HM] = instr_alu_export,
+	[INSTR_ALU_SHR_HH] = instr_alu_export,
+	[INSTR_ALU_SHR_MI] = instr_alu_export,
+	[INSTR_ALU_SHR_HI] = instr_alu_export,
+
+	[INSTR_REGPREFETCH_RH] = instr_reg_export,
+	[INSTR_REGPREFETCH_RM] = instr_reg_export,
+	[INSTR_REGPREFETCH_RI] = instr_reg_export,
+
+	[INSTR_REGRD_HRH] = instr_reg_export,
+	[INSTR_REGRD_HRM] = instr_reg_export,
+	[INSTR_REGRD_MRH] = instr_reg_export,
+	[INSTR_REGRD_MRM] = instr_reg_export,
+	[INSTR_REGRD_HRI] = instr_reg_export,
+	[INSTR_REGRD_MRI] = instr_reg_export,
+
+	[INSTR_REGWR_RHH] = instr_reg_export,
+	[INSTR_REGWR_RHM] = instr_reg_export,
+	[INSTR_REGWR_RMH] = instr_reg_export,
+	[INSTR_REGWR_RMM] = instr_reg_export,
+	[INSTR_REGWR_RHI] = instr_reg_export,
+	[INSTR_REGWR_RMI] = instr_reg_export,
+	[INSTR_REGWR_RIH] = instr_reg_export,
+	[INSTR_REGWR_RIM] = instr_reg_export,
+	[INSTR_REGWR_RII] = instr_reg_export,
+
+	[INSTR_REGADD_RHH] = instr_reg_export,
+	[INSTR_REGADD_RHM] = instr_reg_export,
+	[INSTR_REGADD_RMH] = instr_reg_export,
+	[INSTR_REGADD_RMM] = instr_reg_export,
+	[INSTR_REGADD_RHI] = instr_reg_export,
+	[INSTR_REGADD_RMI] = instr_reg_export,
+	[INSTR_REGADD_RIH] = instr_reg_export,
+	[INSTR_REGADD_RIM] = instr_reg_export,
+	[INSTR_REGADD_RII] = instr_reg_export,
+
+	[INSTR_METPREFETCH_H] = instr_meter_export,
+	[INSTR_METPREFETCH_M] = instr_meter_export,
+	[INSTR_METPREFETCH_I] = instr_meter_export,
+
+	[INSTR_METER_HHM] = instr_meter_export,
+	[INSTR_METER_HHI] = instr_meter_export,
+	[INSTR_METER_HMM] = instr_meter_export,
+	[INSTR_METER_HMI] = instr_meter_export,
+	[INSTR_METER_MHM] = instr_meter_export,
+	[INSTR_METER_MHI] = instr_meter_export,
+	[INSTR_METER_MMM] = instr_meter_export,
+	[INSTR_METER_MMI] = instr_meter_export,
+	[INSTR_METER_IHM] = instr_meter_export,
+	[INSTR_METER_IHI] = instr_meter_export,
+	[INSTR_METER_IMM] = instr_meter_export,
+	[INSTR_METER_IMI] = instr_meter_export,
+
+	[INSTR_TABLE] = instr_table_export,
+	[INSTR_TABLE_AF] = instr_table_export,
+	[INSTR_SELECTOR] = instr_table_export,
+	[INSTR_LEARNER] = instr_table_export,
+	[INSTR_LEARNER_AF] = instr_table_export,
+
+	[INSTR_LEARNER_LEARN] = instr_learn_export,
+	[INSTR_LEARNER_FORGET] = instr_forget_export,
+
+	[INSTR_EXTERN_OBJ] = instr_extern_export,
+	[INSTR_EXTERN_FUNC] = instr_extern_export,
+
+	[INSTR_JMP] = instr_jmp_export,
+	[INSTR_JMP_VALID] = instr_jmp_export,
+	[INSTR_JMP_INVALID] = instr_jmp_export,
+	[INSTR_JMP_HIT] = instr_jmp_export,
+	[INSTR_JMP_MISS] = instr_jmp_export,
+	[INSTR_JMP_ACTION_HIT] = instr_jmp_export,
+	[INSTR_JMP_ACTION_MISS] = instr_jmp_export,
+
+	[INSTR_JMP_EQ] = instr_jmp_export,
+	[INSTR_JMP_EQ_MH] = instr_jmp_export,
+	[INSTR_JMP_EQ_HM] = instr_jmp_export,
+	[INSTR_JMP_EQ_HH] = instr_jmp_export,
+	[INSTR_JMP_EQ_I] = instr_jmp_export,
+
+	[INSTR_JMP_NEQ] = instr_jmp_export,
+	[INSTR_JMP_NEQ_MH] = instr_jmp_export,
+	[INSTR_JMP_NEQ_HM] = instr_jmp_export,
+	[INSTR_JMP_NEQ_HH] = instr_jmp_export,
+	[INSTR_JMP_NEQ_I] = instr_jmp_export,
+
+	[INSTR_JMP_LT] = instr_jmp_export,
+	[INSTR_JMP_LT_MH] = instr_jmp_export,
+	[INSTR_JMP_LT_HM] = instr_jmp_export,
+	[INSTR_JMP_LT_HH] = instr_jmp_export,
+	[INSTR_JMP_LT_MI] = instr_jmp_export,
+	[INSTR_JMP_LT_HI] = instr_jmp_export,
+
+	[INSTR_JMP_GT] = instr_jmp_export,
+	[INSTR_JMP_GT_MH] = instr_jmp_export,
+	[INSTR_JMP_GT_HM] = instr_jmp_export,
+	[INSTR_JMP_GT_HH] = instr_jmp_export,
+	[INSTR_JMP_GT_MI] = instr_jmp_export,
+	[INSTR_JMP_GT_HI] = instr_jmp_export,
+
+	[INSTR_RETURN] = instr_return_export,
+};
+
+static void
+action_data_codegen(struct action *a, FILE *f)
+{
+	uint32_t i;
+
+	fprintf(f,
+		"static const struct instruction action_%s_instructions[] = {\n",
+		a->name);
+
+	for (i = 0; i < a->n_instructions; i++) {
+		struct instruction *instr = &a->instructions[i];
+		instruction_export_t func = export_table[instr->type];
+
+		func(instr, f);
+	}
+
+	fprintf(f, "};\n");
+}
+
 static int
 pipeline_codegen(struct rte_swx_pipeline *p)
 {
+	struct action *a;
 	FILE *f = NULL;
 
 	if (!p)
@@ -9815,6 +10899,15 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 	/* Include the .h file. */
 	fprintf(f, "#include \"rte_swx_pipeline_internal.h\"\n");
 
+	/* Add the code for each action. */
+	TAILQ_FOREACH(a, &p->actions, node) {
+		fprintf(f, "/**\n * Action %s\n */\n\n", a->name);
+
+		action_data_codegen(a, f);
+
+		fprintf(f, "\n");
+	}
+
 	/* Close the .c file. */
 	fclose(f);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 21/24] pipeline: generate action functions
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (18 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
                       ` (5 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Generate a C function for each action. For most instructions, the
associated inline function is called directly. Special care is taken
for TX, jump and return instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 662 ++++++++++++++++++++++++++++++++
 1 file changed, 662 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index c962283fed..ccd26d0f3a 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1376,6 +1376,26 @@ instruction_is_tx(enum instruction_type type)
 	}
 }
 
+static int
+instruction_does_tx(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_TX:
+	case INSTR_TX_I:
+	case INSTR_HDR_EMIT_TX:
+	case INSTR_HDR_EMIT2_TX:
+	case INSTR_HDR_EMIT3_TX:
+	case INSTR_HDR_EMIT4_TX:
+	case INSTR_HDR_EMIT5_TX:
+	case INSTR_HDR_EMIT6_TX:
+	case INSTR_HDR_EMIT7_TX:
+	case INSTR_HDR_EMIT8_TX:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 static int
 instruction_is_jmp(struct instruction *instr)
 {
@@ -10882,6 +10902,644 @@ action_data_codegen(struct action *a, FILE *f)
 	fprintf(f, "};\n");
 }
 
+static const char *
+instr_type_to_func(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_RX: return NULL;
+
+	case INSTR_TX: return "__instr_tx_exec";
+	case INSTR_TX_I: return "__instr_tx_i_exec";
+
+	case INSTR_HDR_EXTRACT: return "__instr_hdr_extract_exec";
+	case INSTR_HDR_EXTRACT2: return "__instr_hdr_extract2_exec";
+	case INSTR_HDR_EXTRACT3: return "__instr_hdr_extract3_exec";
+	case INSTR_HDR_EXTRACT4: return "__instr_hdr_extract4_exec";
+	case INSTR_HDR_EXTRACT5: return "__instr_hdr_extract5_exec";
+	case INSTR_HDR_EXTRACT6: return "__instr_hdr_extract6_exec";
+	case INSTR_HDR_EXTRACT7: return "__instr_hdr_extract7_exec";
+	case INSTR_HDR_EXTRACT8: return "__instr_hdr_extract8_exec";
+
+	case INSTR_HDR_EXTRACT_M: return "__instr_hdr_extract_m_exec";
+
+	case INSTR_HDR_LOOKAHEAD: return "__instr_hdr_lookahead_exec";
+
+	case INSTR_HDR_EMIT: return "__instr_hdr_emit_exec";
+	case INSTR_HDR_EMIT_TX: return "__instr_hdr_emit_tx_exec";
+	case INSTR_HDR_EMIT2_TX: return "__instr_hdr_emit2_tx_exec";
+	case INSTR_HDR_EMIT3_TX: return "__instr_hdr_emit3_tx_exec";
+	case INSTR_HDR_EMIT4_TX: return "__instr_hdr_emit4_tx_exec";
+	case INSTR_HDR_EMIT5_TX: return "__instr_hdr_emit5_tx_exec";
+	case INSTR_HDR_EMIT6_TX: return "__instr_hdr_emit6_tx_exec";
+	case INSTR_HDR_EMIT7_TX: return "__instr_hdr_emit7_tx_exec";
+	case INSTR_HDR_EMIT8_TX: return "__instr_hdr_emit8_tx_exec";
+
+	case INSTR_HDR_VALIDATE: return "__instr_hdr_validate_exec";
+	case INSTR_HDR_INVALIDATE: return "__instr_hdr_invalidate_exec";
+
+	case INSTR_MOV: return "__instr_mov_exec";
+	case INSTR_MOV_MH: return "__instr_mov_mh_exec";
+	case INSTR_MOV_HM: return "__instr_mov_hm_exec";
+	case INSTR_MOV_HH: return "__instr_mov_hh_exec";
+	case INSTR_MOV_I: return "__instr_mov_i_exec";
+
+	case INSTR_DMA_HT: return "__instr_dma_ht_exec";
+	case INSTR_DMA_HT2: return "__instr_dma_ht2_exec";
+	case INSTR_DMA_HT3: return "__instr_dma_ht3_exec";
+	case INSTR_DMA_HT4: return "__instr_dma_ht4_exec";
+	case INSTR_DMA_HT5: return "__instr_dma_ht5_exec";
+	case INSTR_DMA_HT6: return "__instr_dma_ht6_exec";
+	case INSTR_DMA_HT7: return "__instr_dma_ht7_exec";
+	case INSTR_DMA_HT8: return "__instr_dma_ht8_exec";
+
+	case INSTR_ALU_ADD: return "__instr_alu_add_exec";
+	case INSTR_ALU_ADD_MH: return "__instr_alu_add_mh_exec";
+	case INSTR_ALU_ADD_HM: return "__instr_alu_add_hm_exec";
+	case INSTR_ALU_ADD_HH: return "__instr_alu_add_hh_exec";
+	case INSTR_ALU_ADD_MI: return "__instr_alu_add_mi_exec";
+	case INSTR_ALU_ADD_HI: return "__instr_alu_add_hi_exec";
+
+	case INSTR_ALU_SUB: return "__instr_alu_sub_exec";
+	case INSTR_ALU_SUB_MH: return "__instr_alu_sub_mh_exec";
+	case INSTR_ALU_SUB_HM: return "__instr_alu_sub_hm_exec";
+	case INSTR_ALU_SUB_HH: return "__instr_alu_sub_hh_exec";
+	case INSTR_ALU_SUB_MI: return "__instr_alu_sub_mi_exec";
+	case INSTR_ALU_SUB_HI: return "__instr_alu_sub_hi_exec";
+
+	case INSTR_ALU_CKADD_FIELD: return "__instr_alu_ckadd_field_exec";
+	case INSTR_ALU_CKADD_STRUCT20: return "__instr_alu_ckadd_struct20_exec";
+	case INSTR_ALU_CKADD_STRUCT: return "__instr_alu_ckadd_struct_exec";
+	case INSTR_ALU_CKSUB_FIELD: return "__instr_alu_cksub_field_exec";
+
+	case INSTR_ALU_AND: return "__instr_alu_and_exec";
+	case INSTR_ALU_AND_MH: return "__instr_alu_and_mh_exec";
+	case INSTR_ALU_AND_HM: return "__instr_alu_and_hm_exec";
+	case INSTR_ALU_AND_HH: return "__instr_alu_and_hh_exec";
+	case INSTR_ALU_AND_I: return "__instr_alu_and_i_exec";
+
+	case INSTR_ALU_OR: return "__instr_alu_or_exec";
+	case INSTR_ALU_OR_MH: return "__instr_alu_or_mh_exec";
+	case INSTR_ALU_OR_HM: return "__instr_alu_or_hm_exec";
+	case INSTR_ALU_OR_HH: return "__instr_alu_or_hh_exec";
+	case INSTR_ALU_OR_I: return "__instr_alu_or_i_exec";
+
+	case INSTR_ALU_XOR: return "__instr_alu_xor_exec";
+	case INSTR_ALU_XOR_MH: return "__instr_alu_xor_mh_exec";
+	case INSTR_ALU_XOR_HM: return "__instr_alu_xor_hm_exec";
+	case INSTR_ALU_XOR_HH: return "__instr_alu_xor_hh_exec";
+	case INSTR_ALU_XOR_I: return "__instr_alu_xor_i_exec";
+
+	case INSTR_ALU_SHL: return "__instr_alu_shl_exec";
+	case INSTR_ALU_SHL_MH: return "__instr_alu_shl_mh_exec";
+	case INSTR_ALU_SHL_HM: return "__instr_alu_shl_hm_exec";
+	case INSTR_ALU_SHL_HH: return "__instr_alu_shl_hh_exec";
+	case INSTR_ALU_SHL_MI: return "__instr_alu_shl_mi_exec";
+	case INSTR_ALU_SHL_HI: return "__instr_alu_shl_hi_exec";
+
+	case INSTR_ALU_SHR: return "__instr_alu_shr_exec";
+	case INSTR_ALU_SHR_MH: return "__instr_alu_shr_mh_exec";
+	case INSTR_ALU_SHR_HM: return "__instr_alu_shr_hm_exec";
+	case INSTR_ALU_SHR_HH: return "__instr_alu_shr_hh_exec";
+	case INSTR_ALU_SHR_MI: return "__instr_alu_shr_mi_exec";
+	case INSTR_ALU_SHR_HI: return "__instr_alu_shr_hi_exec";
+
+	case INSTR_REGPREFETCH_RH: return "__instr_regprefetch_rh_exec";
+	case INSTR_REGPREFETCH_RM: return "__instr_regprefetch_rm_exec";
+	case INSTR_REGPREFETCH_RI: return "__instr_regprefetch_ri_exec";
+
+	case INSTR_REGRD_HRH: return "__instr_regrd_hrh_exec";
+	case INSTR_REGRD_HRM: return "__instr_regrd_hrm_exec";
+	case INSTR_REGRD_HRI: return "__instr_regrd_hri_exec";
+	case INSTR_REGRD_MRH: return "__instr_regrd_mrh_exec";
+	case INSTR_REGRD_MRM: return "__instr_regrd_mrm_exec";
+	case INSTR_REGRD_MRI: return "__instr_regrd_mri_exec";
+
+	case INSTR_REGWR_RHH: return "__instr_regwr_rhh_exec";
+	case INSTR_REGWR_RHM: return "__instr_regwr_rhm_exec";
+	case INSTR_REGWR_RHI: return "__instr_regwr_rhi_exec";
+	case INSTR_REGWR_RMH: return "__instr_regwr_rmh_exec";
+	case INSTR_REGWR_RMM: return "__instr_regwr_rmm_exec";
+	case INSTR_REGWR_RMI: return "__instr_regwr_rmi_exec";
+	case INSTR_REGWR_RIH: return "__instr_regwr_rih_exec";
+	case INSTR_REGWR_RIM: return "__instr_regwr_rim_exec";
+	case INSTR_REGWR_RII: return "__instr_regwr_rii_exec";
+
+	case INSTR_REGADD_RHH: return "__instr_regadd_rhh_exec";
+	case INSTR_REGADD_RHM: return "__instr_regadd_rhm_exec";
+	case INSTR_REGADD_RHI: return "__instr_regadd_rhi_exec";
+	case INSTR_REGADD_RMH: return "__instr_regadd_rmh_exec";
+	case INSTR_REGADD_RMM: return "__instr_regadd_rmm_exec";
+	case INSTR_REGADD_RMI: return "__instr_regadd_rmi_exec";
+	case INSTR_REGADD_RIH: return "__instr_regadd_rih_exec";
+	case INSTR_REGADD_RIM: return "__instr_regadd_rim_exec";
+	case INSTR_REGADD_RII: return "__instr_regadd_rii_exec";
+
+	case INSTR_METPREFETCH_H: return "__instr_metprefetch_h_exec";
+	case INSTR_METPREFETCH_M: return "__instr_metprefetch_m_exec";
+	case INSTR_METPREFETCH_I: return "__instr_metprefetch_i_exec";
+
+	case INSTR_METER_HHM: return "__instr_meter_hhm_exec";
+	case INSTR_METER_HHI: return "__instr_meter_hhi_exec";
+	case INSTR_METER_HMM: return "__instr_meter_hmm_exec";
+	case INSTR_METER_HMI: return "__instr_meter_hmi_exec";
+	case INSTR_METER_MHM: return "__instr_meter_mhm_exec";
+	case INSTR_METER_MHI: return "__instr_meter_mhi_exec";
+	case INSTR_METER_MMM: return "__instr_meter_mmm_exec";
+	case INSTR_METER_MMI: return "__instr_meter_mmi_exec";
+	case INSTR_METER_IHM: return "__instr_meter_ihm_exec";
+	case INSTR_METER_IHI: return "__instr_meter_ihi_exec";
+	case INSTR_METER_IMM: return "__instr_meter_imm_exec";
+	case INSTR_METER_IMI: return "__instr_meter_imi_exec";
+
+	case INSTR_TABLE: return NULL;
+	case INSTR_TABLE_AF: return NULL;
+	case INSTR_SELECTOR: return NULL;
+	case INSTR_LEARNER: return NULL;
+	case INSTR_LEARNER_AF: return NULL;
+
+	case INSTR_LEARNER_LEARN: return "__instr_learn_exec";
+	case INSTR_LEARNER_FORGET: return "__instr_forget_exec";
+
+	case INSTR_EXTERN_OBJ: return NULL;
+	case INSTR_EXTERN_FUNC: return NULL;
+
+	case INSTR_JMP: return NULL;
+	case INSTR_JMP_VALID: return NULL;
+	case INSTR_JMP_INVALID: return NULL;
+	case INSTR_JMP_HIT: return NULL;
+	case INSTR_JMP_MISS: return NULL;
+	case INSTR_JMP_ACTION_HIT: return NULL;
+	case INSTR_JMP_ACTION_MISS: return NULL;
+	case INSTR_JMP_EQ: return NULL;
+	case INSTR_JMP_EQ_MH: return NULL;
+	case INSTR_JMP_EQ_HM: return NULL;
+	case INSTR_JMP_EQ_HH: return NULL;
+	case INSTR_JMP_EQ_I: return NULL;
+	case INSTR_JMP_NEQ: return NULL;
+	case INSTR_JMP_NEQ_MH: return NULL;
+	case INSTR_JMP_NEQ_HM: return NULL;
+	case INSTR_JMP_NEQ_HH: return NULL;
+	case INSTR_JMP_NEQ_I: return NULL;
+	case INSTR_JMP_LT: return NULL;
+	case INSTR_JMP_LT_MH: return NULL;
+	case INSTR_JMP_LT_HM: return NULL;
+	case INSTR_JMP_LT_HH: return NULL;
+	case INSTR_JMP_LT_MI: return NULL;
+	case INSTR_JMP_LT_HI: return NULL;
+	case INSTR_JMP_GT: return NULL;
+	case INSTR_JMP_GT_MH: return NULL;
+	case INSTR_JMP_GT_HM: return NULL;
+	case INSTR_JMP_GT_HH: return NULL;
+	case INSTR_JMP_GT_MI: return NULL;
+	case INSTR_JMP_GT_HI: return NULL;
+
+	case INSTR_RETURN: return NULL;
+
+	default: return NULL;
+	}
+}
+
+static void
+action_instr_does_tx_codegen(struct action *a,
+			uint32_t instr_pos,
+			struct instruction *instr,
+			FILE *f)
+{
+	fprintf(f,
+		"%s(p, t, &action_%s_instructions[%u]);\n"
+		"\tthread_ip_reset(p, t);\n"
+		"\tinstr_rx_exec(p);\n"
+		"\treturn;\n",
+		instr_type_to_func(instr),
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_extern_obj_codegen(struct action *a,
+				uint32_t instr_pos,
+				FILE *f)
+{
+	fprintf(f,
+		"while (!__instr_extern_obj_exec(p, t, &action_%s_instructions[%u]));\n",
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_extern_func_codegen(struct action *a,
+				 uint32_t instr_pos,
+				 FILE *f)
+{
+	fprintf(f,
+		"while (!__instr_extern_func_exec(p, t, &action_%s_instructions[%u]));\n",
+		a->name,
+		instr_pos);
+}
+
+static void
+action_instr_jmp_codegen(struct action *a,
+			 uint32_t instr_pos,
+			 struct instruction *instr,
+			 struct instruction_data *data,
+			 FILE *f)
+{
+	switch (instr->type) {
+	case INSTR_JMP:
+		fprintf(f,
+			"goto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_VALID:
+		fprintf(f,
+			"if (HEADER_VALID(t, action_%s_instructions[%u].jmp.header_id))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"if (!HEADER_VALID(t, action_%s_instructions[%u].jmp.header_id))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_HIT:
+		fprintf(f,
+			"if (t->hit)\n"
+			"\t\tgoto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_MISS:
+		fprintf(f,
+			"if (!t->hit)\n"
+			"\t\tgoto %s;\n",
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_ACTION_HIT:
+		fprintf(f,
+			"if (t->action_id == action_%s_instructions[%u].jmp.action_id)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"if (t->action_id != action_%s_instructions[%u].jmp.action_id)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_EQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) == "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_NEQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) != "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_LT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) < "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &action_%s_instructions[%u].jmp.b))\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &action_%s_instructions[%u].jmp.a) > "
+			"action_%s_instructions[%u].jmp.b_val)\n"
+			"\t\tgoto %s;\n",
+			a->name,
+			instr_pos,
+			a->name,
+			instr_pos,
+			data->jmp_label);
+		return;
+
+	default:
+		return;
+	}
+}
+
+static void
+action_instr_return_codegen(FILE *f)
+{
+	fprintf(f,
+		"return;\n");
+}
+
+static void
+action_instr_codegen(struct action *a, FILE *f)
+{
+	uint32_t i;
+
+	fprintf(f,
+		"void\n"
+		"action_%s_run(struct rte_swx_pipeline *p)\n"
+		"{\n"
+		"\tstruct thread *t = &p->threads[p->thread_id];\n"
+		"\n",
+		a->name);
+
+	for (i = 0; i < a->n_instructions; i++) {
+		struct instruction *instr = &a->instructions[i];
+		struct instruction_data *data = &a->instruction_data[i];
+
+		/* Label, if present. */
+		if (data->label[0])
+			fprintf(f, "\n%s : ", data->label);
+		else
+			fprintf(f, "\n\t");
+
+		/* TX instruction type. */
+		if (instruction_does_tx(instr)) {
+			action_instr_does_tx_codegen(a, i, instr, f);
+			continue;
+		}
+
+		/* Extern object/function instruction type. */
+		if (instr->type == INSTR_EXTERN_OBJ) {
+			action_instr_extern_obj_codegen(a, i, f);
+			continue;
+		}
+
+		if (instr->type == INSTR_EXTERN_FUNC) {
+			action_instr_extern_func_codegen(a, i, f);
+			continue;
+		}
+
+		/* Jump instruction type. */
+		if (instruction_is_jmp(instr)) {
+			action_instr_jmp_codegen(a, i, instr, data, f);
+			continue;
+		}
+
+		/* Return instruction type. */
+		if (instr->type == INSTR_RETURN) {
+			action_instr_return_codegen(f);
+			continue;
+		}
+
+		/* Any other instruction type. */
+		fprintf(f,
+			"%s(p, t, &action_%s_instructions[%u]);\n",
+			instr_type_to_func(instr),
+			a->name,
+			i);
+	}
+
+	fprintf(f, "}\n\n");
+}
+
 static int
 pipeline_codegen(struct rte_swx_pipeline *p)
 {
@@ -10906,6 +11564,10 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 		action_data_codegen(a, f);
 
 		fprintf(f, "\n");
+
+		action_instr_codegen(a, f);
+
+		fprintf(f, "\n");
 	}
 
 	/* Close the .c file. */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 22/24] pipeline: generate custom instruction functions
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (19 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 21/24] pipeline: generate action functions Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
                       ` (4 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Generate a C function for each custom instruction, which essentially
consolidate multiple regular instructions into a single function call.
The pipeline program is split into groups of instructions, and a
custom instruction is generated for each group that has more than one
instruction. Special care is taken the instructions that can do thread
yield (RX, extern) and for those that can change the instruction
pointer (TX, near/far jump).

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c | 651 +++++++++++++++++++++++++++++++-
 1 file changed, 645 insertions(+), 6 deletions(-)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index ccd26d0f3a..e669dd09d2 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -1436,6 +1436,24 @@ instruction_is_jmp(struct instruction *instr)
 	}
 }
 
+static int
+instruction_does_thread_yield(struct instruction *instr)
+{
+	switch (instr->type) {
+	case INSTR_RX:
+	case INSTR_TABLE:
+	case INSTR_TABLE_AF:
+	case INSTR_SELECTOR:
+	case INSTR_LEARNER:
+	case INSTR_LEARNER_AF:
+	case INSTR_EXTERN_OBJ:
+	case INSTR_EXTERN_FUNC:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 static struct field *
 action_field_parse(struct action *action, const char *name);
 
@@ -11540,15 +11558,623 @@ action_instr_codegen(struct action *a, FILE *f)
 	fprintf(f, "}\n\n");
 }
 
+struct instruction_group {
+	TAILQ_ENTRY(instruction_group) node;
+
+	uint32_t group_id;
+
+	uint32_t first_instr_id;
+
+	uint32_t last_instr_id;
+
+	instr_exec_t func;
+};
+
+TAILQ_HEAD(instruction_group_list, instruction_group);
+
+static struct instruction_group *
+instruction_group_list_group_find(struct instruction_group_list *igl, uint32_t instruction_id)
+{
+	struct instruction_group *g;
+
+	TAILQ_FOREACH(g, igl, node)
+		if ((g->first_instr_id <= instruction_id) && (instruction_id <= g->last_instr_id))
+			return g;
+
+	return NULL;
+}
+
+static void
+instruction_group_list_free(struct instruction_group_list *igl)
+{
+	if (!igl)
+		return;
+
+	for ( ; ; ) {
+		struct instruction_group *g;
+
+		g = TAILQ_FIRST(igl);
+		if (!g)
+			break;
+
+		TAILQ_REMOVE(igl, g, node);
+		free(g);
+	}
+
+	free(igl);
+}
+
+static struct instruction_group_list *
+instruction_group_list_create(struct rte_swx_pipeline *p)
+{
+	struct instruction_group_list *igl = NULL;
+	struct instruction_group *g = NULL;
+	uint32_t n_groups = 0, i;
+
+	if (!p || !p->instructions || !p->instruction_data || !p->n_instructions)
+		goto error;
+
+	/* List init. */
+	igl = calloc(1, sizeof(struct instruction_group_list));
+	if (!igl)
+		goto error;
+
+	TAILQ_INIT(igl);
+
+	/* Allocate the first group. */
+	g = calloc(1, sizeof(struct instruction_group));
+	if (!g)
+		goto error;
+
+	/* Iteration 1: Separate the instructions into groups based on the thread yield
+	 * instructions. Do not worry about the jump instructions at this point.
+	 */
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+
+		/* Check for thread yield instructions. */
+		if (!instruction_does_thread_yield(instr))
+			continue;
+
+		/* If the current group contains at least one instruction, then finalize it (with
+		 * the previous instruction), add it to the list and allocate a new group (that
+		 * starts with the current instruction).
+		 */
+		if (i - g->first_instr_id) {
+			/* Finalize the group. */
+			g->last_instr_id = i - 1;
+
+			/* Add the group to the list. Advance the number of groups. */
+			TAILQ_INSERT_TAIL(igl, g, node);
+			n_groups++;
+
+			/* Allocate a new group. */
+			g = calloc(1, sizeof(struct instruction_group));
+			if (!g)
+				goto error;
+
+			/* Initialize the new group. */
+			g->group_id = n_groups;
+			g->first_instr_id = i;
+		}
+
+		/* Finalize the current group (with the current instruction, therefore this group
+		 * contains just the current thread yield instruction), add it to the list and
+		 * allocate a new group (that starts with the next instruction).
+		 */
+
+		/* Finalize the group. */
+		g->last_instr_id = i;
+
+		/* Add the group to the list. Advance the number of groups. */
+		TAILQ_INSERT_TAIL(igl, g, node);
+		n_groups++;
+
+		/* Allocate a new group. */
+		g = calloc(1, sizeof(struct instruction_group));
+		if (!g)
+			goto error;
+
+		/* Initialize the new group. */
+		g->group_id = n_groups;
+		g->first_instr_id = i + 1;
+	}
+
+	/* Handle the last group. */
+	if (i - g->first_instr_id) {
+		/* Finalize the group. */
+		g->last_instr_id = i - 1;
+
+		/* Add the group to the list. Advance the number of groups. */
+		TAILQ_INSERT_TAIL(igl, g, node);
+		n_groups++;
+	} else
+		free(g);
+
+	g = NULL;
+
+	/* Iteration 2: Handle jumps. If the current group contains an instruction which represents
+	 * the destination of a jump instruction located in a different group ("far jump"), then the
+	 * current group has to be split, so that the instruction representing the far jump
+	 * destination is at the start of its group.
+	 */
+	for ( ; ; ) {
+		int is_modified = 0;
+
+		for (i = 0; i < p->n_instructions; i++) {
+			struct instruction_data *data = &p->instruction_data[i];
+			struct instruction_group *g;
+			uint32_t j;
+
+			/* Continue when the current instruction is not a jump destination. */
+			if (!data->n_users)
+				continue;
+
+			g = instruction_group_list_group_find(igl, i);
+			if (!g)
+				goto error;
+
+			/* Find out all the jump instructions with this destination. */
+			for (j = 0; j < p->n_instructions; j++) {
+				struct instruction *jmp_instr = &p->instructions[j];
+				struct instruction_data *jmp_data = &p->instruction_data[j];
+				struct instruction_group *jmp_g, *new_g;
+
+				/* Continue when not a jump instruction. Even when jump instruction,
+				 * continue when the jump destination is not this instruction.
+				 */
+				if (!instruction_is_jmp(jmp_instr) ||
+				    strcmp(jmp_data->jmp_label, data->label))
+					continue;
+
+				jmp_g = instruction_group_list_group_find(igl, j);
+				if (!jmp_g)
+					goto error;
+
+				/* Continue when both the jump instruction and the jump destination
+				 * instruction are in the same group. Even when in different groups,
+				 * still continue if the jump destination instruction is already the
+				 * first instruction of its group.
+				 */
+				if ((jmp_g->group_id == g->group_id) || (g->first_instr_id == i))
+					continue;
+
+				/* Split the group of the current jump destination instruction to
+				 * make this instruction the first instruction of a new group.
+				 */
+				new_g = calloc(1, sizeof(struct instruction_group));
+				if (!new_g)
+					goto error;
+
+				new_g->group_id = n_groups;
+				new_g->first_instr_id = i;
+				new_g->last_instr_id = g->last_instr_id;
+
+				g->last_instr_id = i - 1;
+
+				TAILQ_INSERT_AFTER(igl, g, new_g, node);
+				n_groups++;
+				is_modified = 1;
+
+				/* The decision to split this group (to make the current instruction
+				 * the first instruction of a new group) is already taken and fully
+				 * implemented, so no need to search for more reasons to do it.
+				 */
+				break;
+			}
+		}
+
+		/* Re-evaluate everything, as at least one group got split, so some jumps that were
+		 * previously considered local (i.e. the jump destination is in the same group as
+		 * the jump instruction) can now be "far jumps" (i.e. the jump destination is in a
+		 * different group than the jump instruction). Wost case scenario: each instruction
+		 * that is a jump destination ends up as the first instruction of its group.
+		 */
+		if (!is_modified)
+			break;
+	}
+
+	/* Re-assign the group IDs to be in incremental order. */
+	i = 0;
+	TAILQ_FOREACH(g, igl, node) {
+		g->group_id = i;
+
+		i++;
+	}
+
+	return igl;
+
+error:
+	instruction_group_list_free(igl);
+
+	free(g);
+
+	return NULL;
+}
+
+static void
+pipeline_instr_does_tx_codegen(struct rte_swx_pipeline *p __rte_unused,
+			       uint32_t instr_pos,
+			       struct instruction *instr,
+			       FILE *f)
+{
+	fprintf(f,
+		"%s(p, t, &pipeline_instructions[%u]);\n"
+		"\tthread_ip_reset(p, t);\n"
+		"\tinstr_rx_exec(p);\n"
+		"\treturn;\n",
+		instr_type_to_func(instr),
+		instr_pos);
+}
+
+static int
+pipeline_instr_jmp_codegen(struct rte_swx_pipeline *p,
+			   struct instruction_group_list *igl,
+			   uint32_t jmp_instr_id,
+			   struct instruction *jmp_instr,
+			   struct instruction_data *jmp_data,
+			   FILE *f)
+{
+	struct instruction_group *jmp_g, *g;
+	struct instruction_data *data;
+	uint32_t instr_id;
+
+	switch (jmp_instr->type) {
+	case INSTR_JMP:
+		break;
+
+	case INSTR_JMP_VALID:
+		fprintf(f,
+			"if (HEADER_VALID(t, pipeline_instructions[%u].jmp.header_id))",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_INVALID:
+		fprintf(f,
+			"if (!HEADER_VALID(t, pipeline_instructions[%u].jmp.header_id))",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_HIT:
+		fprintf(f,
+			"if (t->hit)\n");
+		break;
+
+	case INSTR_JMP_MISS:
+		fprintf(f,
+			"if (!t->hit)\n");
+		break;
+
+	case INSTR_JMP_ACTION_HIT:
+		fprintf(f,
+			"if (t->action_id == pipeline_instructions[%u].jmp.action_id)",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_ACTION_MISS:
+		fprintf(f,
+			"if (t->action_id != pipeline_instructions[%u].jmp.action_id)",
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_EQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) == "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_NEQ_I:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) != "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_LT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) < "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_MH:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HM:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_hbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HH:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"instr_operand_nbo(t, &pipeline_instructions[%u].jmp.b))",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_MI:
+		fprintf(f,
+			"if (instr_operand_hbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	case INSTR_JMP_GT_HI:
+		fprintf(f,
+			"if (instr_operand_nbo(t, &pipeline_instructions[%u].jmp.a) > "
+			"pipeline_instructions[%u].jmp.b_val)",
+			jmp_instr_id,
+			jmp_instr_id);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Find the instruction group of the jump instruction. */
+	jmp_g = instruction_group_list_group_find(igl, jmp_instr_id);
+	if (!jmp_g)
+		return -EINVAL;
+
+	/* Find the instruction group of the jump destination instruction. */
+	data = label_find(p->instruction_data, p->n_instructions, jmp_data->jmp_label);
+	if (!data)
+		return -EINVAL;
+
+	instr_id = data - p->instruction_data;
+
+	g = instruction_group_list_group_find(igl, instr_id);
+	if (!g)
+		return -EINVAL;
+
+	/* Code generation for "near" jump (same instruction group) or "far" jump (different
+	 * instruction group).
+	 */
+	if (g->group_id == jmp_g->group_id)
+		fprintf(f,
+			"\n\t\tgoto %s;\n",
+			jmp_data->jmp_label);
+	else
+		fprintf(f,
+			" {\n"
+			"\t\tthread_ip_set(t, &p->instructions[%u]);\n"
+			"\t\treturn;\n"
+			"\t}\n\n",
+			g->group_id);
+
+	return 0;
+}
+
+static void
+instruction_group_list_codegen(struct instruction_group_list *igl,
+			       struct rte_swx_pipeline *p,
+			       FILE *f)
+{
+	struct instruction_group *g;
+	uint32_t i;
+	int is_required = 0;
+
+	/* Check if code generation is required. */
+	TAILQ_FOREACH(g, igl, node)
+		if (g->first_instr_id < g->last_instr_id)
+			is_required = 1;
+
+	if (!is_required)
+		return;
+
+	/* Generate the code for the pipeline instruction array. */
+	fprintf(f,
+		"static const struct instruction pipeline_instructions[] = {\n");
+
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+		instruction_export_t func = export_table[instr->type];
+
+		func(instr, f);
+	}
+
+	fprintf(f, "};\n\n");
+
+	/* Generate the code for the pipeline functions: one function for each instruction group
+	 * that contains more than one instruction.
+	 */
+	TAILQ_FOREACH(g, igl, node) {
+		struct instruction *last_instr;
+		uint32_t j;
+
+		/* Skip if group contains a single instruction. */
+		if (g->last_instr_id == g->first_instr_id)
+			continue;
+
+		/* Generate new pipeline function. */
+		fprintf(f,
+			"void\n"
+			"pipeline_func_%u(struct rte_swx_pipeline *p)\n"
+			"{\n"
+			"\tstruct thread *t = &p->threads[p->thread_id];\n"
+			"\n",
+			g->group_id);
+
+		/* Generate the code for each pipeline instruction. */
+		for (j = g->first_instr_id; j <= g->last_instr_id; j++) {
+			struct instruction *instr = &p->instructions[j];
+			struct instruction_data *data = &p->instruction_data[j];
+
+			/* Label, if present. */
+			if (data->label[0])
+				fprintf(f, "\n%s : ", data->label);
+			else
+				fprintf(f, "\n\t");
+
+			/* TX instruction type. */
+			if (instruction_does_tx(instr)) {
+				pipeline_instr_does_tx_codegen(p, j, instr, f);
+				continue;
+			}
+
+			/* Jump instruction type. */
+			if (instruction_is_jmp(instr)) {
+				pipeline_instr_jmp_codegen(p, igl, j, instr, data, f);
+				continue;
+			}
+
+			/* Any other instruction type. */
+			fprintf(f,
+				"%s(p, t, &pipeline_instructions[%u]);\n",
+				instr_type_to_func(instr),
+				j);
+		}
+
+		/* Finalize the generated pipeline function. For some instructions such as TX,
+		 * emit-many-and-TX and unconditional jump, the next instruction has been already
+		 * decided unconditionally and the instruction pointer of the current thread set
+		 * accordingly; for all the other instructions, the instruction pointer must be
+		 * incremented now.
+		 */
+		last_instr = &p->instructions[g->last_instr_id];
+
+		if (!instruction_does_tx(last_instr) && (last_instr->type != INSTR_JMP))
+			fprintf(f,
+				"thread_ip_inc(p);\n");
+
+		fprintf(f,
+			"}\n"
+			"\n");
+	}
+}
+
 static int
-pipeline_codegen(struct rte_swx_pipeline *p)
+pipeline_codegen(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 {
 	struct action *a;
 	FILE *f = NULL;
 
-	if (!p)
-		return -EINVAL;
-
 	/* Create the .c file. */
 	f = fopen("/tmp/pipeline.c", "w");
 	if (!f)
@@ -11570,6 +12196,9 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 		fprintf(f, "\n");
 	}
 
+	/* Add the pipeline code. */
+	instruction_group_list_codegen(igl, p, f);
+
 	/* Close the .c file. */
 	fclose(f);
 
@@ -11579,12 +12208,22 @@ pipeline_codegen(struct rte_swx_pipeline *p)
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
+	struct instruction_group_list *igl = NULL;
 	int status = 0;
 
+	igl = instruction_group_list_create(p);
+	if (!igl) {
+		status = -ENOMEM;
+		goto free;
+	}
+
 	/* Code generation. */
-	status = pipeline_codegen(p);
+	status = pipeline_codegen(p, igl);
 	if (status)
-		return status;
+		goto free;
+
+free:
+	instruction_group_list_free(igl);
 
 	return status;
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 23/24] pipeline: build shared object for pipeline
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (20 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
                       ` (3 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev; +Cc: Cunming Liang

Build the generated C file into a shared object library.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 131 +++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h |   1 +
 2 files changed, 132 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index e669dd09d2..02519a05a9 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <arpa/inet.h>
+#include <dlfcn.h>
 
 #include "rte_swx_pipeline_internal.h"
 
@@ -8968,9 +8969,13 @@ rte_swx_pipeline_config(struct rte_swx_pipeline **p, int numa_node)
 void
 rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 {
+	void *lib;
+
 	if (!p)
 		return;
 
+	lib = p->lib;
+
 	free(p->instruction_data);
 	free(p->instructions);
 
@@ -8991,6 +8996,9 @@ rte_swx_pipeline_free(struct rte_swx_pipeline *p)
 	struct_free(p);
 
 	free(p);
+
+	if (lib)
+		dlclose(lib);
 }
 
 int
@@ -12205,6 +12213,124 @@ pipeline_codegen(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 	return 0;
 }
 
+#ifndef RTE_SWX_PIPELINE_CMD_MAX_SIZE
+#define RTE_SWX_PIPELINE_CMD_MAX_SIZE 4096
+#endif
+
+static int
+pipeline_libload(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
+{
+	struct action *a;
+	struct instruction_group *g;
+	char *dir_in, *buffer = NULL;
+	const char *dir_out;
+	int status = 0;
+
+	/* Get the environment variables. */
+	dir_in = getenv("RTE_INSTALL_DIR");
+	if (!dir_in) {
+		status = -EINVAL;
+		goto free;
+	}
+
+	dir_out = "/tmp";
+
+	/* Memory allocation for the command buffer. */
+	buffer = malloc(RTE_SWX_PIPELINE_CMD_MAX_SIZE);
+	if (!buffer) {
+		status = -ENOMEM;
+		goto free;
+	}
+
+	snprintf(buffer,
+		 RTE_SWX_PIPELINE_CMD_MAX_SIZE,
+		 "gcc -c -O3 -fpic -Wno-deprecated-declarations -o %s/pipeline.o %s/pipeline.c "
+		 "-I %s/lib/pipeline "
+		 "-I %s/lib/eal/include "
+		 "-I %s/lib/eal/x86/include "
+		 "-I %s/lib/eal/include/generic "
+		 "-I %s/lib/meter "
+		 "-I %s/lib/port "
+		 "-I %s/lib/table "
+		 "-I %s/lib/pipeline "
+		 "-I %s/config "
+		 "-I %s/build "
+		 "-I %s/lib/eal/linux/include "
+		 ">%s/pipeline.log 2>&1 "
+		 "&& "
+		 "gcc -shared %s/pipeline.o -o %s/libpipeline.so "
+		 ">>%s/pipeline.log 2>&1",
+		 dir_out,
+		 dir_out,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_in,
+		 dir_out,
+		 dir_out,
+		 dir_out,
+		 dir_out);
+
+	/* Build the shared object library. */
+	status = system(buffer);
+	if (status)
+		goto free;
+
+	/* Open library. */
+	snprintf(buffer,
+		 RTE_SWX_PIPELINE_CMD_MAX_SIZE,
+		 "%s/libpipeline.so",
+		 dir_out);
+
+	p->lib = dlopen(buffer, RTLD_LAZY);
+	if (!p->lib) {
+		status = -EIO;
+		goto free;
+	}
+
+	/* Get the action function symbols. */
+	TAILQ_FOREACH(a, &p->actions, node) {
+		snprintf(buffer, RTE_SWX_PIPELINE_CMD_MAX_SIZE, "action_%s_run", a->name);
+
+		p->action_funcs[a->id] = dlsym(p->lib, buffer);
+		if (!p->action_funcs[a->id]) {
+			status = -EINVAL;
+			goto free;
+		}
+	}
+
+	/* Get the pipeline function symbols. */
+	TAILQ_FOREACH(g, igl, node) {
+		if (g->first_instr_id == g->last_instr_id)
+			continue;
+
+		snprintf(buffer, RTE_SWX_PIPELINE_CMD_MAX_SIZE, "pipeline_func_%u", g->group_id);
+
+		g->func = dlsym(p->lib, buffer);
+		if (!g->func) {
+			status = -EINVAL;
+			goto free;
+		}
+	}
+
+free:
+	if (status && p->lib) {
+		dlclose(p->lib);
+		p->lib = NULL;
+	}
+
+	free(buffer);
+
+	return status;
+}
+
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
@@ -12222,6 +12348,11 @@ pipeline_compile(struct rte_swx_pipeline *p)
 	if (status)
 		goto free;
 
+	/* Build and load the shared object library. */
+	status = pipeline_libload(p, igl);
+	if (status)
+		goto free;
+
 free:
 	instruction_group_list_free(igl);
 
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 64625b40c6..3baae55737 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -1407,6 +1407,7 @@ struct rte_swx_pipeline {
 	struct instruction_data *instruction_data;
 	instr_exec_t *instruction_table;
 	struct thread threads[RTE_SWX_PIPELINE_THREADS_MAX];
+	void *lib;
 
 	uint32_t n_structs;
 	uint32_t n_ports_in;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* [dpdk-dev] [PATCH V3 24/24] pipeline: enable pipeline compilation
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (21 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
@ 2021-09-13 16:44     ` Cristian Dumitrescu
  2021-09-13 16:51     ` [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file Stephen Hemminger
                       ` (2 subsequent siblings)
  25 siblings, 0 replies; 79+ messages in thread
From: Cristian Dumitrescu @ 2021-09-13 16:44 UTC (permalink / raw)
  To: dev

Commit the pipeline changes when the compilation process is
successful: change the table lookup instructions to execute the action
function for each action, replace the regular pipeline instructions
with the custom instructions.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
V3:
-added more checks

 lib/pipeline/rte_swx_pipeline.c | 94 +++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 02519a05a9..31f0029404 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -12177,6 +12177,26 @@ instruction_group_list_codegen(struct instruction_group_list *igl,
 	}
 }
 
+static uint32_t
+instruction_group_list_custom_instructions_count(struct instruction_group_list *igl)
+{
+	struct instruction_group *g;
+	uint32_t n_custom_instr = 0;
+
+	/* Groups with a single instruction: no function is generated for this group, the group
+	 * keeps its current instruction. Groups with more than two instructions: one function and
+	 * the associated custom instruction get generated for each such group.
+	 */
+	TAILQ_FOREACH(g, igl, node) {
+		if (g->first_instr_id == g->last_instr_id)
+			continue;
+
+		n_custom_instr++;
+	}
+
+	return n_custom_instr;
+}
+
 static int
 pipeline_codegen(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 {
@@ -12331,6 +12351,73 @@ pipeline_libload(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
 	return status;
 }
 
+static int
+pipeline_adjust_check(struct rte_swx_pipeline *p __rte_unused,
+		      struct instruction_group_list *igl)
+{
+	uint32_t n_custom_instr = instruction_group_list_custom_instructions_count(igl);
+
+	/* Check that enough space is available within the pipeline instruction table to store all
+	 * the custom instructions.
+	 */
+	if (INSTR_CUSTOM_0 + n_custom_instr > RTE_SWX_PIPELINE_INSTRUCTION_TABLE_SIZE_MAX)
+		return -ENOSPC;
+
+	return 0;
+}
+
+static void
+pipeline_adjust(struct rte_swx_pipeline *p, struct instruction_group_list *igl)
+{
+	struct instruction_group *g;
+	uint32_t i;
+
+	/* Pipeline table instructions. */
+	for (i = 0; i < p->n_instructions; i++) {
+		struct instruction *instr = &p->instructions[i];
+
+		if (instr->type == INSTR_TABLE)
+			instr->type = INSTR_TABLE_AF;
+
+		if (instr->type == INSTR_LEARNER)
+			instr->type = INSTR_LEARNER_AF;
+	}
+
+	/* Pipeline custom instructions. */
+	i = 0;
+	TAILQ_FOREACH(g, igl, node) {
+		struct instruction *instr = &p->instructions[g->first_instr_id];
+		uint32_t j;
+
+		if (g->first_instr_id == g->last_instr_id)
+			continue;
+
+		/* Install a new custom instruction. */
+		p->instruction_table[INSTR_CUSTOM_0 + i] = g->func;
+
+		/* First instruction of the group: change its type to the new custom instruction. */
+		instr->type = INSTR_CUSTOM_0 + i;
+
+		/* All the subsequent instructions of the group: invalidate. */
+		for (j = g->first_instr_id + 1; j <= g->last_instr_id; j++) {
+			struct instruction_data *data = &p->instruction_data[j];
+
+			data->invalid = 1;
+		}
+
+		i++;
+	}
+
+	/* Remove the invalidated instructions. */
+	p->n_instructions = instr_compact(p->instructions, p->instruction_data, p->n_instructions);
+
+	/* Resolve the jump destination for any "standalone" jump instructions (i.e. those jump
+	 * instructions that are the only instruction within their group, so they were left
+	 * unmodified).
+	 */
+	instr_jmp_resolve(p->instructions, p->instruction_data, p->n_instructions);
+}
+
 static int
 pipeline_compile(struct rte_swx_pipeline *p)
 {
@@ -12353,6 +12440,13 @@ pipeline_compile(struct rte_swx_pipeline *p)
 	if (status)
 		goto free;
 
+	/* Adjust instructions. */
+	status = pipeline_adjust_check(p, igl);
+	if (status)
+		goto free;
+
+	pipeline_adjust(p, igl);
+
 free:
 	instruction_group_list_free(igl);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 79+ messages in thread

* Re: [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (22 preceding siblings ...)
  2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
@ 2021-09-13 16:51     ` Stephen Hemminger
  2021-09-13 18:42       ` Dumitrescu, Cristian
  2021-09-20 15:24     ` Dumitrescu, Cristian
  2021-09-27 10:11     ` Thomas Monjalon
  25 siblings, 1 reply; 79+ messages in thread
From: Stephen Hemminger @ 2021-09-13 16:51 UTC (permalink / raw)
  To: Cristian Dumitrescu; +Cc: dev

On Mon, 13 Sep 2021 17:44:20 +0100
Cristian Dumitrescu <cristian.dumitrescu@intel.com> wrote:

> Start to consolidate the data structures and inline functions required
> by the pipeline instructions into an internal header file.
> 
> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> ---
> Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")

Won't this change will make future changes to API/ABI harder because more
of the pipeline internals are exposed to application?

^ permalink raw reply	[flat|nested] 79+ messages in thread

* Re: [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file
  2021-09-10 14:09   ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Bruce Richardson
@ 2021-09-13 17:07     ` Dumitrescu, Cristian
  0 siblings, 0 replies; 79+ messages in thread
From: Dumitrescu, Cristian @ 2021-09-13 17:07 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: dev



> -----Original Message-----
> From: Richardson, Bruce <bruce.richardson@intel.com>
> Sent: Friday, September 10, 2021 3:09 PM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to
> internal header file
> 
> On Fri, Sep 10, 2021 at 02:36:50PM +0100, Cristian Dumitrescu wrote:
> > Start to consolidate the data structures and inline functions required
> > by the pipeline instructions into an internal header file.
> >
> > Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> > ---
> > Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")
> >
> >  lib/pipeline/meson.build                 |    4 +
> >  lib/pipeline/rte_swx_pipeline.c          | 1373 +--------------------
> >  lib/pipeline/rte_swx_pipeline_internal.h | 1383
> ++++++++++++++++++++++
> >  3 files changed, 1388 insertions(+), 1372 deletions(-)
> >  create mode 100644 lib/pipeline/rte_swx_pipeline_internal.h
> >
> > diff --git a/lib/pipeline/meson.build b/lib/pipeline/meson.build
> > index 9132bb517a..ec009631bf 100644
> > --- a/lib/pipeline/meson.build
> > +++ b/lib/pipeline/meson.build
> > @@ -18,3 +18,7 @@ headers = files(
> >          'rte_swx_ctl.h',
> >  )
> >  deps += ['port', 'table', 'meter', 'sched', 'cryptodev']
> > +
> > +indirect_headers += files(
> > +        'rte_swx_pipeline_internal.h',
> > +)
> 
> internal headers should not appear in meson.build at all, as they are not
> for installation. "indirect_headers" is for non-internal, public headers
> which are not directly included by apps, but included via another public
> header. For example, rte_ethdev_core.h should not be included directly, but
> by including rte_ethdev.h which will then pull it in. [The reason these are
> separated out in meson.build is for header sanity checking, to check that
> normal, non-indirect public headers include all their dependent headers]

Yes, my bad, fixed in V3 just sent. Thanks, Bruce.

^ permalink raw reply	[flat|nested] 79+ messages in thread

* Re: [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file
  2021-09-13 16:51     ` [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file Stephen Hemminger
@ 2021-09-13 18:42       ` Dumitrescu, Cristian
  2021-09-13 19:02         ` Stephen Hemminger
  0 siblings, 1 reply; 79+ messages in thread
From: Dumitrescu, Cristian @ 2021-09-13 18:42 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev



> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Monday, September 13, 2021 5:51 PM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to
> internal header file
> 
> On Mon, 13 Sep 2021 17:44:20 +0100
> Cristian Dumitrescu <cristian.dumitrescu@intel.com> wrote:
> 
> > Start to consolidate the data structures and inline functions required
> > by the pipeline instructions into an internal header file.
> >
> > Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> > ---
> > Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")
> 
> Won't this change will make future changes to API/ABI harder because more
> of the pipeline internals are exposed to application?

Not at all, this header file is internal to the library and not accessible to the application.

^ permalink raw reply	[flat|nested] 79+ messages in thread

* Re: [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file
  2021-09-13 18:42       ` Dumitrescu, Cristian
@ 2021-09-13 19:02         ` Stephen Hemminger
  0 siblings, 0 replies; 79+ messages in thread
From: Stephen Hemminger @ 2021-09-13 19:02 UTC (permalink / raw)
  To: Dumitrescu, Cristian; +Cc: dev

On Mon, 13 Sep 2021 18:42:39 +0000
"Dumitrescu, Cristian" <cristian.dumitrescu@intel.com> wrote:

> > -----Original Message-----
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: Monday, September 13, 2021 5:51 PM
> > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to
> > internal header file
> > 
> > On Mon, 13 Sep 2021 17:44:20 +0100
> > Cristian Dumitrescu <cristian.dumitrescu@intel.com> wrote:
> >   
> > > Start to consolidate the data structures and inline functions required
> > > by the pipeline instructions into an internal header file.
> > >
> > > Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> > > ---
> > > Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")  
> > 
> > Won't this change will make future changes to API/ABI harder because more
> > of the pipeline internals are exposed to application?  
> 
> Not at all, this header file is internal to the library and not accessible to the application.

Good

^ permalink raw reply	[flat|nested] 79+ messages in thread

* Re: [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (23 preceding siblings ...)
  2021-09-13 16:51     ` [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file Stephen Hemminger
@ 2021-09-20 15:24     ` Dumitrescu, Cristian
  2021-09-27 10:11     ` Thomas Monjalon
  25 siblings, 0 replies; 79+ messages in thread
From: Dumitrescu, Cristian @ 2021-09-20 15:24 UTC (permalink / raw)
  To: Dumitrescu, Cristian, dev


> Depends-on: series-18297 ("[V4,1/4] table: add support learner tables")

Just sent an updated version for the learner table series, which does not result in any code changes for this patch set. Therefore, the updated dependency list of this patch set is:

Depends-on: series-19048 ("[V5,1/4] table: add support learner tables")

Thanks,
Cristian

^ permalink raw reply	[flat|nested] 79+ messages in thread

* Re: [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file
  2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
                       ` (24 preceding siblings ...)
  2021-09-20 15:24     ` Dumitrescu, Cristian
@ 2021-09-27 10:11     ` Thomas Monjalon
  25 siblings, 0 replies; 79+ messages in thread
From: Thomas Monjalon @ 2021-09-27 10:11 UTC (permalink / raw)
  To: Cristian Dumitrescu; +Cc: dev

13/09/2021 18:44, Cristian Dumitrescu:
> Start to consolidate the data structures and inline functions required
> by the pipeline instructions into an internal header file.
> 
> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

Series applied, thanks.




^ permalink raw reply	[flat|nested] 79+ messages in thread

end of thread, other threads:[~2021-09-27 10:11 UTC | newest]

Thread overview: 79+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-10 12:29 [dpdk-dev] [PATCH 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 17/24] pipeline: introduce action functions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
2021-09-10 12:29 ` [dpdk-dev] [PATCH 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 21/24] pipeline: generate action functions Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
2021-09-10 12:30 ` [dpdk-dev] [PATCH 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
2021-09-10 13:36 ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
2021-09-10 13:36   ` [dpdk-dev] [PATCH V2 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 17/24] pipeline: introduce action functions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 21/24] pipeline: generate action functions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
2021-09-10 13:37   ` [dpdk-dev] [PATCH V2 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
2021-09-10 14:09   ` [dpdk-dev] [PATCH V2 01/24] pipeline: move data structures to internal header file Bruce Richardson
2021-09-13 17:07     ` Dumitrescu, Cristian
2021-09-13 16:44   ` [dpdk-dev] [PATCH V3 " Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 02/24] pipeline: move thread inline functions to " Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 03/24] pipeline: create inline functions for RX instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 04/24] pipeline: create inline functions for TX instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 05/24] pipeline: create inline functions for extract instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 06/24] pipeline: create inline functions for emit instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 07/24] pipeline: create inline functions for validate instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 08/24] pipeline: create inline functions for learn instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 09/24] pipeline: create inline functions for extern instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 10/24] pipeline: create inline functions for move instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 11/24] pipeline: create inline functions for DMA instruction Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 12/24] pipeline: create inline functions for ALU instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 13/24] pipeline: create inline functions for register instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 14/24] pipeline: create inline functions for meter instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 15/24] pipeline: create inline functions for instruction operands Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 16/24] pipeline: enable persistent instruction meta-data Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 17/24] pipeline: introduce action functions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 18/24] pipeline: introduce custom instructions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 19/24] pipeline: introduce pipeline compilation Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 20/24] pipeline: export pipeline instructions to file Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 21/24] pipeline: generate action functions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 22/24] pipeline: generate custom instruction functions Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 23/24] pipeline: build shared object for pipeline Cristian Dumitrescu
2021-09-13 16:44     ` [dpdk-dev] [PATCH V3 24/24] pipeline: enable pipeline compilation Cristian Dumitrescu
2021-09-13 16:51     ` [dpdk-dev] [PATCH V3 01/24] pipeline: move data structures to internal header file Stephen Hemminger
2021-09-13 18:42       ` Dumitrescu, Cristian
2021-09-13 19:02         ` Stephen Hemminger
2021-09-20 15:24     ` Dumitrescu, Cristian
2021-09-27 10:11     ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).