* [PATCH] pipeline: rework optimization pattern for header generation
@ 2022-06-16 14:16 Cristian Dumitrescu
2022-06-20 14:10 ` Thomas Monjalon
0 siblings, 1 reply; 2+ messages in thread
From: Cristian Dumitrescu @ 2022-06-16 14:16 UTC (permalink / raw)
To: dev
The P4 language requires marking a header as valid before any of the
header fields are written as opposed to after the writes are done.
Hence, the optimization of replacing the sequence of instructions to
generate a header by reading it from the table action data with a
single DMA internal instruction are reworked from "mov all + validate
-> dma" to "validate + mov all -> dma".
Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
examples/pipeline/examples/fib.spec | 2 +-
examples/pipeline/examples/vxlan.spec | 8 +--
lib/pipeline/rte_swx_pipeline.c | 89 +++++++++++-------------
lib/pipeline/rte_swx_pipeline_internal.h | 14 +++-
4 files changed, 57 insertions(+), 56 deletions(-)
diff --git a/examples/pipeline/examples/fib.spec b/examples/pipeline/examples/fib.spec
index 3f8c76610c..0c2d19c106 100644
--- a/examples/pipeline/examples/fib.spec
+++ b/examples/pipeline/examples/fib.spec
@@ -83,10 +83,10 @@ struct nexthop_action_args_t {
action nexthop_action args instanceof nexthop_action_args_t {
//Set Ethernet header.
+ validate h.ethernet
mov h.ethernet.dst_addr t.ethernet_dst_addr
mov h.ethernet.src_addr t.ethernet_src_addr
mov h.ethernet.ethertype t.ethernet_ethertype
- validate h.ethernet
//Decrement the TTL and update the checksum within the IPv4 header.
cksub h.ipv4.hdr_checksum h.ipv4.ttl
diff --git a/examples/pipeline/examples/vxlan.spec b/examples/pipeline/examples/vxlan.spec
index ac62ab2bec..4ab4309f16 100644
--- a/examples/pipeline/examples/vxlan.spec
+++ b/examples/pipeline/examples/vxlan.spec
@@ -115,12 +115,13 @@ struct vxlan_encap_args_t {
action vxlan_encap args instanceof vxlan_encap_args_t {
//Set the outer Ethernet header.
+ validate h.outer_ethernet
mov h.outer_ethernet.dst_addr t.ethernet_dst_addr
mov h.outer_ethernet.src_addr t.ethernet_src_addr
mov h.outer_ethernet.ethertype t.ethernet_ethertype
- validate h.outer_ethernet
//Set the outer IPv4 header.
+ validate h.outer_ipv4
mov h.outer_ipv4.ver_ihl t.ipv4_ver_ihl
mov h.outer_ipv4.diffserv t.ipv4_diffserv
mov h.outer_ipv4.total_len t.ipv4_total_len
@@ -131,21 +132,20 @@ action vxlan_encap args instanceof vxlan_encap_args_t {
mov h.outer_ipv4.hdr_checksum t.ipv4_hdr_checksum
mov h.outer_ipv4.src_addr t.ipv4_src_addr
mov h.outer_ipv4.dst_addr t.ipv4_dst_addr
- validate h.outer_ipv4
//Set the outer UDP header.
+ validate h.outer_udp
mov h.outer_udp.src_port t.udp_src_port
mov h.outer_udp.dst_port t.udp_dst_port
mov h.outer_udp.length t.udp_length
mov h.outer_udp.checksum t.udp_checksum
- validate h.outer_udp
//Set the outer VXLAN header.
+ validate h.outer_vxlan
mov h.outer_vxlan.flags t.vxlan_flags
mov h.outer_vxlan.reserved t.vxlan_reserved
mov h.outer_vxlan.vni t.vxlan_vni
mov h.outer_vxlan.reserved2 t.vxlan_reserved2
- validate h.outer_vxlan
//Set the output port.
mov m.port_out t.port_out
diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 066356684e..50805ba821 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2284,6 +2284,7 @@ instr_hdr_validate_translate(struct rte_swx_pipeline *p,
instr->type = INSTR_HDR_VALIDATE;
instr->valid.header_id = h->id;
+ instr->valid.struct_id = h->struct_id;
return 0;
}
@@ -6754,7 +6755,7 @@ action_arg_src_mov_count(struct action *a,
uint32_t n_instructions);
static int
-instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
+instr_pattern_validate_mov_all_search(struct rte_swx_pipeline *p,
struct action *a,
struct instruction *instr,
struct instruction_data *data,
@@ -6771,51 +6772,42 @@ instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
if (!a || !a->st)
return 0;
- /* First instruction: MOV_HM. */
- if (data[0].invalid || (instr[0].type != INSTR_MOV_HM))
+ /* First instruction: HDR_VALIDATE. Second instruction: MOV_HM. */
+ if (data[0].invalid ||
+ (instr[0].type != INSTR_HDR_VALIDATE) ||
+ (n_instr < 2) ||
+ data[1].invalid ||
+ (instr[1].type != INSTR_MOV_HM) ||
+ instr[1].mov.src.struct_id)
return 0;
- h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id);
- if (!h || h->st->var_size)
+ h = header_find_by_struct_id(p, instr[0].valid.struct_id);
+ if (!h ||
+ h->st->var_size ||
+ (n_instr < 1 + h->st->n_fields))
return 0;
for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++)
- if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8)
+ if (instr[1].mov.src.offset == a->st->fields[src_field_id].offset / 8)
break;
- if (src_field_id == a->st->n_fields)
+ if (src_field_id + h->st->n_fields > a->st->n_fields)
return 0;
- if (instr[0].mov.dst.offset ||
- (instr[0].mov.dst.n_bits != h->st->fields[0].n_bits) ||
- instr[0].mov.src.struct_id ||
- (instr[0].mov.src.n_bits != a->st->fields[src_field_id].n_bits) ||
- (instr[0].mov.dst.n_bits != instr[0].mov.src.n_bits))
- return 0;
-
- if ((n_instr < h->st->n_fields + 1) ||
- (a->st->n_fields < src_field_id + h->st->n_fields + 1))
- return 0;
-
- /* Subsequent instructions: MOV_HM. */
- for (i = 1; i < h->st->n_fields; i++)
- if (data[i].invalid ||
- data[i].n_users ||
- (instr[i].type != INSTR_MOV_HM) ||
- (instr[i].mov.dst.struct_id != h->struct_id) ||
- (instr[i].mov.dst.offset != h->st->fields[i].offset / 8) ||
- (instr[i].mov.dst.n_bits != h->st->fields[i].n_bits) ||
- instr[i].mov.src.struct_id ||
- (instr[i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) ||
- (instr[i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) ||
- (instr[i].mov.dst.n_bits != instr[i].mov.src.n_bits))
+ /* Second and subsequent instructions: MOV_HM. */
+ for (i = 0; i < h->st->n_fields; i++)
+ if (data[1 + i].invalid ||
+ data[1 + i].n_users ||
+ (instr[1 + i].type != INSTR_MOV_HM) ||
+ (instr[1 + i].mov.dst.struct_id != h->struct_id) ||
+ (instr[1 + i].mov.dst.offset != h->st->fields[i].offset / 8) ||
+ (instr[1 + i].mov.dst.n_bits != h->st->fields[i].n_bits) ||
+ instr[1 + i].mov.src.struct_id ||
+ (instr[1 + i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) ||
+ (instr[1 + i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) ||
+ (instr[1 + i].mov.dst.n_bits != instr[1 + i].mov.src.n_bits))
return 0;
- /* Last instruction: HDR_VALIDATE. */
- if ((instr[i].type != INSTR_HDR_VALIDATE) ||
- (instr[i].valid.header_id != h->id))
- return 0;
-
/* Check that none of the action args that are used as source for this
* DMA transfer are not used as source in any other mov instruction.
*/
@@ -6831,12 +6823,12 @@ instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
return 0;
}
- *n_pattern_instr = 1 + i;
+ *n_pattern_instr = 1 + h->st->n_fields;
return 1;
}
static void
-instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
+instr_pattern_validate_mov_all_replace(struct rte_swx_pipeline *p,
struct action *a,
struct instruction *instr,
struct instruction_data *data,
@@ -6846,19 +6838,16 @@ instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
uint32_t src_field_id, src_offset, i;
/* Read from the instructions before they are modified. */
- h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id);
+ h = header_find_by_struct_id(p, instr[1].mov.dst.struct_id);
if (!h)
return;
+ src_offset = instr[1].mov.src.offset;
+
for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++)
- if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8)
+ if (src_offset == a->st->fields[src_field_id].offset / 8)
break;
- if (src_field_id == a->st->n_fields)
- return;
-
- src_offset = instr[0].mov.src.offset;
-
/* Modify the instructions. */
instr[0].type = INSTR_DMA_HT;
instr[0].dma.dst.header_id[0] = h->id;
@@ -6875,7 +6864,7 @@ instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
}
static uint32_t
-instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
+instr_pattern_validate_mov_all_optimize(struct rte_swx_pipeline *p,
struct action *a,
struct instruction *instructions,
struct instruction_data *instruction_data,
@@ -6892,8 +6881,8 @@ instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
uint32_t n_instr = 0;
int detected;
- /* Mov all + validate. */
- detected = instr_pattern_mov_all_validate_search(p,
+ /* Validate + mov all. */
+ detected = instr_pattern_validate_mov_all_search(p,
a,
instr,
data,
@@ -6903,7 +6892,7 @@ instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
n_instructions,
&n_instr);
if (detected) {
- instr_pattern_mov_all_validate_replace(p, a, instr, data, n_instr);
+ instr_pattern_validate_mov_all_replace(p, a, instr, data, n_instr);
i += n_instr;
continue;
}
@@ -7020,8 +7009,8 @@ instr_optimize(struct rte_swx_pipeline *p,
instruction_data,
n_instructions);
- /* Mov all + validate. */
- n_instructions = instr_pattern_mov_all_validate_optimize(p,
+ /* Validate + mov all. */
+ n_instructions = instr_pattern_validate_mov_all_optimize(p,
a,
instructions,
instruction_data,
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 5feee8eff6..a35635efb7 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -632,6 +632,7 @@ struct instr_io {
struct instr_hdr_validity {
uint8_t header_id;
+ uint8_t struct_id;
};
struct instr_table {
@@ -2228,11 +2229,22 @@ __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
const struct instruction *ip)
{
uint32_t header_id = ip->valid.header_id;
+ uint32_t struct_id = ip->valid.struct_id;
+ uint64_t valid_headers = t->valid_headers;
+ struct header_runtime *h = &t->headers[header_id];
TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
+ /* If this header is already valid, then its associated t->structs[] element is also valid
+ * and therefore it should not be modified. It could point to the packet buffer (in case of
+ * extracted header) and setting it to the default location (h->ptr0) would be incorrect.
+ */
+ if (MASK64_BIT_GET(valid_headers, header_id))
+ return;
+
/* Headers. */
- t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+ t->structs[struct_id] = h->ptr0;
+ t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
}
/*
--
2.17.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] pipeline: rework optimization pattern for header generation
2022-06-16 14:16 [PATCH] pipeline: rework optimization pattern for header generation Cristian Dumitrescu
@ 2022-06-20 14:10 ` Thomas Monjalon
0 siblings, 0 replies; 2+ messages in thread
From: Thomas Monjalon @ 2022-06-20 14:10 UTC (permalink / raw)
To: Cristian Dumitrescu; +Cc: dev
16/06/2022 16:16, Cristian Dumitrescu:
> The P4 language requires marking a header as valid before any of the
> header fields are written as opposed to after the writes are done.
> Hence, the optimization of replacing the sequence of instructions to
> generate a header by reading it from the table action data with a
> single DMA internal instruction are reworked from "mov all + validate
> -> dma" to "validate + mov all -> dma".
>
> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Applied, thanks.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-06-20 14:10 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-16 14:16 [PATCH] pipeline: rework optimization pattern for header generation Cristian Dumitrescu
2022-06-20 14:10 ` Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).