* [PATCH 1/3] pipeline: add new instruction for upper half of IPv6 address
  2024-02-13 16:57 [PATCH 0/3] pipeline: extend the IPv6 support Cristian Dumitrescu
@ 2024-02-13 16:57 ` Cristian Dumitrescu
  2024-02-13 16:57 ` [PATCH 2/3] pipeline: optimize conversion between IPv4 and IPv6 addresses Cristian Dumitrescu
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Cristian Dumitrescu @ 2024-02-13 16:57 UTC (permalink / raw)
  To: dev
Added new instruction called "movh" to read/write the upper half of an
IPv6 address, i.e. bits 127-64 of a 128-bit field.
Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 99 ++++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h | 52 ++++++++++++-
 2 files changed, 147 insertions(+), 4 deletions(-)
diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index da37eda231..12f335005d 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3359,6 +3359,61 @@ instr_mov_i_exec(struct rte_swx_pipeline *p)
 	thread_ip_inc(p);
 }
 
+/*
+ * movh.
+ */
+static int
+instr_movh_translate(struct rte_swx_pipeline *p,
+		     struct action *action,
+		     char **tokens,
+		     int n_tokens,
+		     struct instruction *instr,
+		     struct instruction_data *data __rte_unused)
+{
+	char *dst = tokens[1], *src = tokens[2];
+	struct field *fdst, *fsrc;
+	uint32_t dst_struct_id = 0, src_struct_id = 0;
+
+	CHECK(n_tokens == 3, EINVAL);
+
+	fdst = struct_field_parse(p, NULL, dst, &dst_struct_id);
+	CHECK(fdst, EINVAL);
+	CHECK(!fdst->var_size, EINVAL);
+
+	fsrc = struct_field_parse(p, action, src, &src_struct_id);
+	CHECK(fsrc, EINVAL);
+	CHECK(!fsrc->var_size, EINVAL);
+
+	/* MOVH_64_128, MOVH_128_64. */
+	if ((dst[0] == 'h' && fdst->n_bits == 64 && fsrc->n_bits == 128) ||
+	    (fdst->n_bits == 128 && src[0] == 'h' && fsrc->n_bits == 64)) {
+		instr->type = INSTR_MOVH;
+
+		instr->mov.dst.struct_id = (uint8_t)dst_struct_id;
+		instr->mov.dst.n_bits = fdst->n_bits;
+		instr->mov.dst.offset = fdst->offset / 8;
+
+		instr->mov.src.struct_id = (uint8_t)src_struct_id;
+		instr->mov.src.n_bits = fsrc->n_bits;
+		instr->mov.src.offset = fsrc->offset / 8;
+		return 0;
+	}
+
+	CHECK(0, EINVAL);
+}
+
+static inline void
+instr_movh_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_movh_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
 /*
  * dma.
  */
@@ -6427,6 +6482,14 @@ instr_translate(struct rte_swx_pipeline *p,
 					   instr,
 					   data);
 
+	if (!strcmp(tokens[tpos], "movh"))
+		return instr_movh_translate(p,
+					    action,
+					    &tokens[tpos],
+					    n_tokens - tpos,
+					    instr,
+					    data);
+
 	if (!strcmp(tokens[tpos], "add"))
 		return instr_alu_add_translate(p,
 					       action,
@@ -7463,6 +7526,8 @@ static instr_exec_t instruction_table[] = {
 	[INSTR_MOV_128_32] = instr_mov_128_32_exec,
 	[INSTR_MOV_I] = instr_mov_i_exec,
 
+	[INSTR_MOVH] = instr_movh_exec,
+
 	[INSTR_DMA_HT] = instr_dma_ht_exec,
 	[INSTR_DMA_HT2] = instr_dma_ht2_exec,
 	[INSTR_DMA_HT3] = instr_dma_ht3_exec,
@@ -11788,6 +11853,8 @@ instr_type_to_name(struct instruction *instr)
 	case INSTR_MOV_128_32: return "INSTR_MOV_128_32";
 	case INSTR_MOV_I: return "INSTR_MOV_I";
 
+	case INSTR_MOVH: return "INSTR_MOVH";
+
 	case INSTR_DMA_HT: return "INSTR_DMA_HT";
 	case INSTR_DMA_HT2: return "INSTR_DMA_HT2";
 	case INSTR_DMA_HT3: return "INSTR_DMA_HT3";
@@ -12181,6 +12248,34 @@ instr_mov_export(struct instruction *instr, FILE *f)
 			instr->mov.src_val);
 }
 
+static void
+instr_movh_export(struct instruction *instr, FILE *f)
+{
+	fprintf(f,
+		"\t{\n"
+		"\t\t.type = %s,\n"
+		"\t\t.mov = {\n"
+		"\t\t\t.dst = {\n"
+		"\t\t\t\t.struct_id = %u,\n"
+		"\t\t\t\t.n_bits = %u,\n"
+		"\t\t\t\t.offset = %u,\n"
+		"\t\t\t},\n"
+		"\t\t\t.src = {\n"
+		"\t\t\t\t.struct_id = %u,\n"
+		"\t\t\t\t.n_bits = %u,\n"
+		"\t\t\t\t.offset = %u,\n"
+		"\t\t\t},\n"
+		"\t\t},\n"
+		"\t},\n",
+		instr_type_to_name(instr),
+		instr->mov.dst.struct_id,
+		instr->mov.dst.n_bits,
+		instr->mov.dst.offset,
+		instr->mov.src.struct_id,
+		instr->mov.src.n_bits,
+		instr->mov.src.offset);
+}
+
 static void
 instr_dma_ht_export(struct instruction *instr, FILE *f)
 {
@@ -12829,6 +12924,8 @@ static instruction_export_t export_table[] = {
 	[INSTR_MOV_128_32] = instr_mov_export,
 	[INSTR_MOV_I] = instr_mov_export,
 
+	[INSTR_MOVH] = instr_movh_export,
+
 	[INSTR_DMA_HT]  = instr_dma_ht_export,
 	[INSTR_DMA_HT2] = instr_dma_ht_export,
 	[INSTR_DMA_HT3] = instr_dma_ht_export,
@@ -13058,6 +13155,8 @@ instr_type_to_func(struct instruction *instr)
 	case INSTR_MOV_128_32: return "__instr_mov_128_32_exec";
 	case INSTR_MOV_I: return "__instr_mov_i_exec";
 
+	case INSTR_MOVH: return "__instr_movh_exec";
+
 	case INSTR_DMA_HT: return "__instr_dma_ht_exec";
 	case INSTR_DMA_HT2: return "__instr_dma_ht2_exec";
 	case INSTR_DMA_HT3: return "__instr_dma_ht3_exec";
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 8ec12263b9..7ae7622329 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -244,20 +244,34 @@ struct header_out_runtime {
  * Instruction.
  */
 
-/* Packet headers are always in Network Byte Order (NBO), i.e. big endian.
+/* Operand endianness conventions:
+ *
+ * Case 1: Small fields (i.e. fields with size <= 64 bits)
+ *
+ * Packet headers are always in Network Byte Order (NBO), i.e. big endian.
  * Packet meta-data fields are always assumed to be in Host Byte Order (HBO).
  * Table entry fields can be in either NBO or HBO; they are assumed to be in HBO
  * when transferred to packet meta-data and in NBO when transferred to packet
  * headers.
- */
-
-/* Notation conventions:
+ *
+ * Notation conventions:
  *    -Header field: H = h.header.field (dst/src)
  *    -Meta-data field: M = m.field (dst/src)
  *    -Extern object mailbox field: E = e.field (dst/src)
  *    -Extern function mailbox field: F = f.field (dst/src)
  *    -Table action data field: T = t.field (src only)
  *    -Immediate value: I = 32-bit unsigned value (src only)
+ *
+ * Case 2: Big fields (i.e. fields with size > 64 bits)
+ *
+ * The big fields are allowed in both headers and meta-data, but they are always
+ * stored in NBO. This is why the few instructions that accept a big field
+ * operand require that the other operand, in case it is a small operand, be
+ * stored in NBO as well, i.e. the small operand must be a header field
+ * (i.e. meta-data field not allowed in this case).
+ *
+ * Notation conventions:
+ *    -Header or meta-data big field: HM-NBO.
  */
 
 enum instruction_type {
@@ -333,6 +347,17 @@ enum instruction_type {
 	INSTR_MOV_128_32, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 32 b. */
 	INSTR_MOV_I,   /* dst = HMEF, src = I; size(dst) <= 64 bits. */
 
+	/* movh dst src
+	 * Read/write the upper half (i.e. bits 127 .. 64) of a 128-bit field into/from a 64-bit
+	 * header field:
+	 *
+	 *    dst64 = src128[127:64], where: dst64 = H, src128 = HM-NBO.
+	 *    dst128[127:64] = src64, where: dst128 = HM-NBO, src64 = H.
+	 *
+	 * Typically required for operations involving IPv6 addresses.
+	 */
+	INSTR_MOVH,
+
 	/* dma h.header t.field
 	 * memcpy(h.header, t.field, sizeof(h.header))
 	 */
@@ -2686,6 +2711,25 @@ __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
 	MOV_I(t, ip);
 }
 
+/*
+ * movh.
+ */
+static inline void
+__instr_movh_exec(struct rte_swx_pipeline *p __rte_unused,
+		  struct thread *t,
+		  const struct instruction *ip)
+{
+	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+	uint64_t *dst64 = (uint64_t *)dst;
+	uint64_t *src64 = (uint64_t *)src;
+
+	TRACE("[Thread %2u] movh\n", p->thread_id);
+
+	dst64[0] = src64[0];
+}
+
 /*
  * dma.
  */
-- 
2.34.1
^ permalink raw reply	[flat|nested] 5+ messages in thread* [PATCH 2/3] pipeline: optimize conversion between IPv4 and IPv6 addresses
  2024-02-13 16:57 [PATCH 0/3] pipeline: extend the IPv6 support Cristian Dumitrescu
  2024-02-13 16:57 ` [PATCH 1/3] pipeline: add new instruction for upper half of IPv6 address Cristian Dumitrescu
@ 2024-02-13 16:57 ` Cristian Dumitrescu
  2024-02-13 16:57 ` [PATCH 3/3] examples/pipeline: add example for IPv6 address swap Cristian Dumitrescu
  2024-02-19  0:46 ` [PATCH 0/3] pipeline: extend the IPv6 support Thomas Monjalon
  3 siblings, 0 replies; 5+ messages in thread
From: Cristian Dumitrescu @ 2024-02-13 16:57 UTC (permalink / raw)
  To: dev
Enhanced the move instruction to detect and optimize the conversion
between 128-bit numbers (IPv6 addresses) and 64-bit (upper or lower
part of IPv6 addresses) or 32-bit numbers (IPv4 addresses).
Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 lib/pipeline/rte_swx_pipeline.c          | 56 ++++++++++++++++++++++++
 lib/pipeline/rte_swx_pipeline_internal.h | 52 ++++++++++++++++++++++
 2 files changed, 108 insertions(+)
diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 12f335005d..147c1c2ad4 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -3234,8 +3234,16 @@ instr_mov_translate(struct rte_swx_pipeline *p,
 			instr->type = INSTR_MOV_DMA;
 			if (fdst->n_bits == 128 && fsrc->n_bits == 128)
 				instr->type = INSTR_MOV_128;
+
+			if (fdst->n_bits == 128 && fsrc->n_bits == 64)
+				instr->type = INSTR_MOV_128_64;
+			if (fdst->n_bits == 64 && fsrc->n_bits == 128)
+				instr->type = INSTR_MOV_64_128;
+
 			if (fdst->n_bits == 128 && fsrc->n_bits == 32)
 				instr->type = INSTR_MOV_128_32;
+			if (fdst->n_bits == 32 && fsrc->n_bits == 128)
+				instr->type = INSTR_MOV_32_128;
 		}
 
 		instr->mov.dst.struct_id = (uint8_t)dst_struct_id;
@@ -3335,6 +3343,30 @@ instr_mov_128_exec(struct rte_swx_pipeline *p)
 	thread_ip_inc(p);
 }
 
+static inline void
+instr_mov_128_64_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_128_64_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
+static inline void
+instr_mov_64_128_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_64_128_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
 static inline void
 instr_mov_128_32_exec(struct rte_swx_pipeline *p)
 {
@@ -3347,6 +3379,18 @@ instr_mov_128_32_exec(struct rte_swx_pipeline *p)
 	thread_ip_inc(p);
 }
 
+static inline void
+instr_mov_32_128_exec(struct rte_swx_pipeline *p)
+{
+	struct thread *t = &p->threads[p->thread_id];
+	struct instruction *ip = t->ip;
+
+	__instr_mov_32_128_exec(p, t, ip);
+
+	/* Thread. */
+	thread_ip_inc(p);
+}
+
 static inline void
 instr_mov_i_exec(struct rte_swx_pipeline *p)
 {
@@ -7523,7 +7567,10 @@ static instr_exec_t instruction_table[] = {
 	[INSTR_MOV_HH] = instr_mov_hh_exec,
 	[INSTR_MOV_DMA] = instr_mov_dma_exec,
 	[INSTR_MOV_128] = instr_mov_128_exec,
+	[INSTR_MOV_128_64] = instr_mov_128_64_exec,
+	[INSTR_MOV_64_128] = instr_mov_64_128_exec,
 	[INSTR_MOV_128_32] = instr_mov_128_32_exec,
+	[INSTR_MOV_32_128] = instr_mov_32_128_exec,
 	[INSTR_MOV_I] = instr_mov_i_exec,
 
 	[INSTR_MOVH] = instr_movh_exec,
@@ -11850,7 +11897,10 @@ instr_type_to_name(struct instruction *instr)
 	case INSTR_MOV_HH: return "INSTR_MOV_HH";
 	case INSTR_MOV_DMA: return "INSTR_MOV_DMA";
 	case INSTR_MOV_128: return "INSTR_MOV_128";
+	case INSTR_MOV_128_64: return "INSTR_MOV_128_64";
+	case INSTR_MOV_64_128: return "INSTR_MOV_64_128";
 	case INSTR_MOV_128_32: return "INSTR_MOV_128_32";
+	case INSTR_MOV_32_128: return "INSTR_MOV_32_128";
 	case INSTR_MOV_I: return "INSTR_MOV_I";
 
 	case INSTR_MOVH: return "INSTR_MOVH";
@@ -12921,7 +12971,10 @@ static instruction_export_t export_table[] = {
 	[INSTR_MOV_HH] = instr_mov_export,
 	[INSTR_MOV_DMA] = instr_mov_export,
 	[INSTR_MOV_128] = instr_mov_export,
+	[INSTR_MOV_128_64] = instr_mov_export,
+	[INSTR_MOV_64_128] = instr_mov_export,
 	[INSTR_MOV_128_32] = instr_mov_export,
+	[INSTR_MOV_32_128] = instr_mov_export,
 	[INSTR_MOV_I] = instr_mov_export,
 
 	[INSTR_MOVH] = instr_movh_export,
@@ -13152,7 +13205,10 @@ instr_type_to_func(struct instruction *instr)
 	case INSTR_MOV_HH: return "__instr_mov_hh_exec";
 	case INSTR_MOV_DMA: return "__instr_mov_dma_exec";
 	case INSTR_MOV_128: return "__instr_mov_128_exec";
+	case INSTR_MOV_128_64: return "__instr_mov_128_64_exec";
+	case INSTR_MOV_64_128: return "__instr_mov_64_128_exec";
 	case INSTR_MOV_128_32: return "__instr_mov_128_32_exec";
+	case INSTR_MOV_32_128: return "__instr_mov_32_128_exec";
 	case INSTR_MOV_I: return "__instr_mov_i_exec";
 
 	case INSTR_MOVH: return "__instr_movh_exec";
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 7ae7622329..f5a64e0fe1 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -344,7 +344,10 @@ enum instruction_type {
 	INSTR_MOV_HH,  /* dst = H, src = H; size(dst) <= 64 bits, size(src) <= 64 bits. */
 	INSTR_MOV_DMA, /* dst and src in NBO format. */
 	INSTR_MOV_128, /* dst and src in NBO format, size(dst) = size(src) = 128 bits. */
+	INSTR_MOV_128_64, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 64 b. */
+	INSTR_MOV_64_128, /* dst and src in NBO format, size(dst) = 64 bits, size(src) = 128 b. */
 	INSTR_MOV_128_32, /* dst and src in NBO format, size(dst) = 128 bits, size(src) = 32 b. */
+	INSTR_MOV_32_128, /* dst and src in NBO format, size(dst) = 32 bits, size(src) = 128 b. */
 	INSTR_MOV_I,   /* dst = HMEF, src = I; size(dst) <= 64 bits. */
 
 	/* movh dst src
@@ -2682,6 +2685,39 @@ __instr_mov_128_exec(struct rte_swx_pipeline *p __rte_unused,
 	dst64_ptr[1] = src64_ptr[1];
 }
 
+static inline void
+__instr_mov_128_64_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+	uint64_t *dst64 = (uint64_t *)dst;
+	uint64_t *src64 = (uint64_t *)src;
+
+	TRACE("[Thread %2u] mov (128 <- 64)\n", p->thread_id);
+
+	dst64[0] = 0;
+	dst64[1] = src64[0];
+}
+
+static inline void
+__instr_mov_64_128_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+	uint64_t *dst64 = (uint64_t *)dst;
+	uint64_t *src64 = (uint64_t *)src;
+
+	TRACE("[Thread %2u] mov (64 <- 128)\n", p->thread_id);
+
+	dst64[0] = src64[1];
+}
+
 static inline void
 __instr_mov_128_32_exec(struct rte_swx_pipeline *p __rte_unused,
 			struct thread *t,
@@ -2701,6 +2737,22 @@ __instr_mov_128_32_exec(struct rte_swx_pipeline *p __rte_unused,
 	dst32[3] = src32[0];
 }
 
+static inline void
+__instr_mov_32_128_exec(struct rte_swx_pipeline *p __rte_unused,
+			struct thread *t,
+			const struct instruction *ip)
+{
+	uint8_t *dst = t->structs[ip->mov.dst.struct_id] + ip->mov.dst.offset;
+	uint8_t *src = t->structs[ip->mov.src.struct_id] + ip->mov.src.offset;
+
+	uint32_t *dst32 = (uint32_t *)dst;
+	uint32_t *src32 = (uint32_t *)src;
+
+	TRACE("[Thread %2u] mov (32 <- 128)\n", p->thread_id);
+
+	dst32[0] = src32[3];
+}
+
 static inline void
 __instr_mov_i_exec(struct rte_swx_pipeline *p __rte_unused,
 		   struct thread *t,
-- 
2.34.1
^ permalink raw reply	[flat|nested] 5+ messages in thread