* [PATCH 1/3] bfp: fix MOV instruction evaluation [not found] <20240627115531.1440-1-konstantin.v.ananyev@yandex.ru> @ 2024-06-27 11:55 ` Konstantin Ananyev 2024-06-27 11:55 ` [PATCH 2/3] bfp: fix load hangs with six IPv6 addresses Konstantin Ananyev [not found] ` <20240627180442.1602-1-konstantin.v.ananyev@yandex.ru> 2 siblings, 0 replies; 5+ messages in thread From: Konstantin Ananyev @ 2024-06-27 11:55 UTC (permalink / raw) To: dev; +Cc: stephen, Konstantin Ananyev, stable From: Konstantin Ananyev <konstantin.ananyev@huawei.com> Verifier might left some register-state values uninitialized while evaluating MOV instructions. Add explicit initialization. Fixes: 8021917293d0 ("bpf: add extra validation for input BPF program") Cc: stable@dpdk.org Signed-off-by: Konstantin Ananyev <konstantin.ananyev@huawei.com> --- lib/bpf/bpf_validate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c index 79be5e917d..11344fff4d 100644 --- a/lib/bpf/bpf_validate.c +++ b/lib/bpf/bpf_validate.c @@ -636,14 +636,14 @@ eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins) { uint64_t msk; uint32_t op; - size_t opsz; + size_t opsz, sz; const char *err; struct bpf_eval_state *st; struct bpf_reg_val *rd, rs; - opsz = (BPF_CLASS(ins->code) == BPF_ALU) ? + sz = (BPF_CLASS(ins->code) == BPF_ALU) ? sizeof(uint32_t) : sizeof(uint64_t); - opsz = opsz * CHAR_BIT; + opsz = sz * CHAR_BIT; msk = RTE_LEN2MASK(opsz, uint64_t); st = bvf->evst; @@ -652,8 +652,10 @@ eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins) if (BPF_SRC(ins->code) == BPF_X) { rs = st->rv[ins->src_reg]; eval_apply_mask(&rs, msk); - } else + } else { + rs = (struct bpf_reg_val){.v = {.size = sz,},}; eval_fill_imm(&rs, msk, ins->imm); + } eval_apply_mask(rd, msk); -- 2.35.3 ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/3] bfp: fix load hangs with six IPv6 addresses [not found] <20240627115531.1440-1-konstantin.v.ananyev@yandex.ru> 2024-06-27 11:55 ` [PATCH 1/3] bfp: fix MOV instruction evaluation Konstantin Ananyev @ 2024-06-27 11:55 ` Konstantin Ananyev 2024-06-27 15:13 ` Stephen Hemminger [not found] ` <20240627180442.1602-1-konstantin.v.ananyev@yandex.ru> 2 siblings, 1 reply; 5+ messages in thread From: Konstantin Ananyev @ 2024-06-27 11:55 UTC (permalink / raw) To: dev; +Cc: stephen, Konstantin Ananyev, stable, Isaac Boukris From: Konstantin Ananyev <konstantin.ananyev@huawei.com> As described in: https://bugs.dpdk.org/show_bug.cgi?id=1465 converting from following cBPF filter: "host 1::1 or host 1::1 or host 1::1 or host 1::1 or host 1::1 or host 1::1" taking too long for BPF verifier ito complete (up to 25 seconds). Looking at it I didn't find any actual functional bug. In fact, it does what is expected: goes through each possible path of BPF program and evaluates register/stack state for each instruction. The problem is that for program with a lot of conditional branches number of possible paths starts to grow exponentially and such walk becomes very excessive. So to minimize number of evaluations, this patch implements heuristic similar to what Linux kernel does - state pruning: If from given instruction for given program state we explore all possible paths and for each of them reach bpf_exit() without any complaints and a valid R0 value, then for that instruction this program state can be marked as 'safe'. When we later arrive at the same instruction with a state equivalent to an earlier instruction 'safe' state, we can prune the search. For now, only states for JCC targets are saved/examined. Plus added few extra logging for DEBUG level. Bugzilla ID: 1465 Fixes: 8021917293d0 ("bpf: add extra validation for input BPF program") Cc: stable@dpdk.org Reported-by: Isaac Boukris <iboukris@gmail.com> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@huawei.com> --- lib/bpf/bpf_validate.c | 304 ++++++++++++++++++++++++++++++++++------- 1 file changed, 254 insertions(+), 50 deletions(-) diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c index 11344fff4d..2dae55b216 100644 --- a/lib/bpf/bpf_validate.c +++ b/lib/bpf/bpf_validate.c @@ -29,10 +29,13 @@ struct bpf_reg_val { }; struct bpf_eval_state { + SLIST_ENTRY(bpf_eval_state) next; /* for @safe list traversal */ struct bpf_reg_val rv[EBPF_REG_NUM]; struct bpf_reg_val sv[MAX_BPF_STACK_SIZE / sizeof(uint64_t)]; }; +SLIST_HEAD(bpf_evst_head, bpf_eval_state); + /* possible instruction node colour */ enum { WHITE, @@ -52,6 +55,9 @@ enum { #define MAX_EDGES 2 +/* max number of 'safe' evaluated states to track per node */ +#define NODE_EVST_MAX 32 + struct inst_node { uint8_t colour; uint8_t nb_edge:4; @@ -59,7 +65,18 @@ struct inst_node { uint8_t edge_type[MAX_EDGES]; uint32_t edge_dest[MAX_EDGES]; uint32_t prev_node; - struct bpf_eval_state *evst; + struct { + struct bpf_eval_state *cur; /*save/restore for jcc targets */ + struct bpf_eval_state *start; + struct bpf_evst_head safe; /* safe states for track/prune */ + uint32_t nb_safe; + } evst; +}; + +struct evst_pool { + uint32_t num; + uint32_t cur; + struct bpf_eval_state *ent; }; struct bpf_verifier { @@ -73,11 +90,8 @@ struct bpf_verifier { uint32_t edge_type[MAX_EDGE_TYPE]; struct bpf_eval_state *evst; struct inst_node *evin; - struct { - uint32_t num; - uint32_t cur; - struct bpf_eval_state *ent; - } evst_pool; + struct evst_pool evst_sr_pool; /* for evst save/restore */ + struct evst_pool evst_tp_pool; /* for evst track/prune */ }; struct bpf_ins_check { @@ -1085,7 +1099,7 @@ eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins) struct bpf_reg_val rvf, rvt; tst = bvf->evst; - fst = bvf->evin->evst; + fst = bvf->evin->evst.cur; frd = fst->rv + ins->dst_reg; trd = tst->rv + ins->dst_reg; @@ -1814,8 +1828,8 @@ add_edge(struct bpf_verifier *bvf, struct inst_node *node, uint32_t nidx) uint32_t ne; if (nidx > bvf->prm->nb_ins) { - RTE_BPF_LOG_LINE(ERR, "%s: program boundary violation at pc: %u, " - "next pc: %u", + RTE_BPF_LOG_LINE(ERR, + "%s: program boundary violation at pc: %u, next pc: %u", __func__, get_node_idx(bvf, node), nidx); return -EINVAL; } @@ -2091,60 +2105,114 @@ validate(struct bpf_verifier *bvf) * helper functions get/free eval states. */ static struct bpf_eval_state * -pull_eval_state(struct bpf_verifier *bvf) +pull_eval_state(struct evst_pool *pool) { uint32_t n; - n = bvf->evst_pool.cur; - if (n == bvf->evst_pool.num) + n = pool->cur; + if (n == pool->num) return NULL; - bvf->evst_pool.cur = n + 1; - return bvf->evst_pool.ent + n; + pool->cur = n + 1; + return pool->ent + n; } static void -push_eval_state(struct bpf_verifier *bvf) +push_eval_state(struct evst_pool *pool) { - bvf->evst_pool.cur--; + RTE_ASSERT(pool->cur != 0); + pool->cur--; } static void evst_pool_fini(struct bpf_verifier *bvf) { bvf->evst = NULL; - free(bvf->evst_pool.ent); - memset(&bvf->evst_pool, 0, sizeof(bvf->evst_pool)); + free(bvf->evst_sr_pool.ent); + memset(&bvf->evst_sr_pool, 0, sizeof(bvf->evst_sr_pool)); + memset(&bvf->evst_tp_pool, 0, sizeof(bvf->evst_tp_pool)); } static int evst_pool_init(struct bpf_verifier *bvf) { - uint32_t n; + uint32_t k, n; - n = bvf->nb_jcc_nodes + 1; + /* + * We need nb_jcc_nodes + 1 for save_cur/restore_cur + * remaining ones will be used for state tracking/pruning. + */ + k = bvf->nb_jcc_nodes + 1; + n = k * 3; - bvf->evst_pool.ent = calloc(n, sizeof(bvf->evst_pool.ent[0])); - if (bvf->evst_pool.ent == NULL) + bvf->evst_sr_pool.ent = calloc(n, sizeof(bvf->evst_sr_pool.ent[0])); + if (bvf->evst_sr_pool.ent == NULL) return -ENOMEM; - bvf->evst_pool.num = n; - bvf->evst_pool.cur = 0; + bvf->evst_sr_pool.num = k; + bvf->evst_sr_pool.cur = 0; - bvf->evst = pull_eval_state(bvf); + bvf->evst_tp_pool.ent = bvf->evst_sr_pool.ent + k; + bvf->evst_tp_pool.num = n - k; + bvf->evst_tp_pool.cur = 0; + + bvf->evst = pull_eval_state(&bvf->evst_sr_pool); return 0; } +/* + * try to allocate and initialise new eval state for given node. + * later if no errors will be encountered, this state will be accepted as + * one of the possible 'safe' states for that node. + */ +static void +save_start_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +{ + RTE_ASSERT(node->evst.start == NULL); + + /* limit number of states for one node with some reasonable value */ + if (node->evst.nb_safe >= NODE_EVST_MAX) + return; + + /* try to get new eval_state */ + node->evst.start = pull_eval_state(&bvf->evst_tp_pool); + + /* make a copy of current state */ + if (node->evst.start != NULL) { + memcpy(node->evst.start, bvf->evst, sizeof(*node->evst.start)); + SLIST_NEXT(node->evst.start, next) = NULL; + } +} + +/* + * add @start state to the list of @safe states. + */ +static void +save_safe_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +{ + if (node->evst.start == NULL) + return; + + SLIST_INSERT_HEAD(&node->evst.safe, node->evst.start, next); + node->evst.nb_safe++; + + RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u,state=%p): nb_safe=%u;", + __func__, bvf, get_node_idx(bvf, node), node->evst.start, + node->evst.nb_safe); + + node->evst.start = NULL; +} + /* * Save current eval state. */ static int -save_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +save_cur_eval_state(struct bpf_verifier *bvf, struct inst_node *node) { struct bpf_eval_state *st; /* get new eval_state for this node */ - st = pull_eval_state(bvf); + st = pull_eval_state(&bvf->evst_sr_pool); if (st == NULL) { RTE_BPF_LOG_LINE(ERR, "%s: internal error (out of space) at pc: %u", @@ -2156,11 +2224,13 @@ save_eval_state(struct bpf_verifier *bvf, struct inst_node *node) memcpy(st, bvf->evst, sizeof(*st)); /* swap current state with new one */ - node->evst = bvf->evst; + RTE_ASSERT(node->evst.cur == NULL); + node->evst.cur = bvf->evst; bvf->evst = st; RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;", - __func__, bvf, get_node_idx(bvf, node), node->evst, bvf->evst); + __func__, bvf, get_node_idx(bvf, node), node->evst.cur, + bvf->evst); return 0; } @@ -2169,14 +2239,15 @@ save_eval_state(struct bpf_verifier *bvf, struct inst_node *node) * Restore previous eval state and mark current eval state as free. */ static void -restore_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +restore_cur_eval_state(struct bpf_verifier *bvf, struct inst_node *node) { RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;", - __func__, bvf, get_node_idx(bvf, node), bvf->evst, node->evst); + __func__, bvf, get_node_idx(bvf, node), bvf->evst, + node->evst.cur); - bvf->evst = node->evst; - node->evst = NULL; - push_eval_state(bvf); + bvf->evst = node->evst.cur; + node->evst.cur = NULL; + push_eval_state(&bvf->evst_sr_pool); } static void @@ -2193,28 +2264,125 @@ log_dbg_eval_state(const struct bpf_verifier *bvf, const struct ebpf_insn *ins, RTE_LOG(DEBUG, BPF, "r%u={\n" - "\tv={type=%u, size=%zu},\n" + "\tv={type=%u, size=%zu, buf_size=%zu},\n" "\tmask=0x%" PRIx64 ",\n" "\tu={min=0x%" PRIx64 ", max=0x%" PRIx64 "},\n" "\ts={min=%" PRId64 ", max=%" PRId64 "},\n" "};\n", ins->dst_reg, - rv->v.type, rv->v.size, + rv->v.type, rv->v.size, rv->v.buf_size, rv->mask, rv->u.min, rv->u.max, rv->s.min, rv->s.max); } /* - * Do second pass through CFG and try to evaluate instructions - * via each possible path. - * Right now evaluation functionality is quite limited. - * Still need to add extra checks for: - * - use/return uninitialized registers. - * - use uninitialized data from the stack. - * - memory boundaries violation. + * compare two evaluation states. + * returns zero if @lv is more conservative (safer) then @rv. + * returns non-zero value otherwise. */ static int +cmp_reg_val_within(const struct bpf_reg_val *lv, const struct bpf_reg_val *rv) +{ + /* expect @v and @mask to be identical */ + if (memcmp(&lv->v, &rv->v, sizeof(lv->v)) != 0 || lv->mask != rv->mask) + return -1; + + /* exact match only for mbuf and stack pointers */ + if (lv->v.type == RTE_BPF_ARG_PTR_MBUF || + lv->v.type == BPF_ARG_PTR_STACK) + return -1; + + if (lv->u.min <= rv->u.min && lv->u.max >= rv->u.max && + lv->s.min <= rv->s.min && lv->s.max >= rv->s.max) + return 0; + + return -1; +} + +/* + * compare two evaluation states. + * returns zero if they are identical. + * returns positive value if @lv is more conservative (safer) then @rv. + * returns negative value otherwise. + */ +static int +cmp_eval_state(const struct bpf_eval_state *lv, const struct bpf_eval_state *rv) +{ + int32_t rc; + uint32_t i, k; + + /* for stack expect identical values */ + rc = memcmp(lv->sv, rv->sv, sizeof(lv->sv)); + if (rc != 0) + return -(2 * EBPF_REG_NUM); + + k = 0; + /* check register values */ + for (i = 0; i != RTE_DIM(lv->rv); i++) { + rc = memcmp(&lv->rv[i], &rv->rv[i], sizeof(lv->rv[i])); + if (rc != 0 && cmp_reg_val_within(&lv->rv[i], &rv->rv[i]) != 0) + return -(i + 1); + k += (rc != 0); + } + + return k; +} + +/* + * check did we already evaluated that path and can it be pruned that time. + */ +static int +prune_eval_state(struct bpf_verifier *bvf, const struct inst_node *node, + struct inst_node *next) +{ + int32_t rc; + struct bpf_eval_state *safe; + + rc = INT32_MIN; + SLIST_FOREACH(safe, &next->evst.safe, next) { + rc = cmp_eval_state(safe, bvf->evst); + if (rc >= 0) + break; + } + + rc = (rc >= 0) ? 0 : -1; + + /* + * current state doesn't match any safe states, + * so no prunning is possible right now, + * track current state for future references. + */ + if (rc != 0) + save_start_eval_state(bvf, next); + + RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u,next=%u) returns %d, " + "next->evst.start=%p, next->evst.nb_safe=%u", + __func__, bvf, get_node_idx(bvf, node), + get_node_idx(bvf, next), rc, + next->evst.start, next->evst.nb_safe); + return rc; +} + +/* Do second pass through CFG and try to evaluate instructions + * via each possible path. The verifier will try all paths, tracking types of + * registers used as input to instructions, and updating resulting type via + * register state values. Plus for each register and possible stack value it + * tries to estimate possible max/min value. + * For conditional jumps, a stack is used to save evaluation state, so one + * path is explored while the state for the other path is pushed onto the stack. + * Then later, we backtrack to the first pushed instruction and repeat the cycle + * until the stack is empty and we're done. + * For program with many conditional branches walking through all possible path + * could be very excessive. So to minimize number of evaluations we use + * heuristic similar to what Linux kernel does - state pruning: + * If from given instruction for given program state we explore all possible + * paths and for each of them reach _exit() without any complaints and a valid + * R0 value, then for that instruction, that program state can be marked as + * 'safe'. When we later arrive at the same instruction with a state + * equivalent to an earlier instruction's 'safe' state, we can prune the search. + * For now, only states for JCC targets are saved/examined. + */ evaluate(struct bpf_verifier *bvf) { int32_t rc; @@ -2223,6 +2391,13 @@ evaluate(struct bpf_verifier *bvf) const struct ebpf_insn *ins; struct inst_node *next, *node; + struct { + uint32_t nb_eval; + uint32_t nb_prune; + uint32_t nb_save; + uint32_t nb_restore; + } stats; + /* initial state of frame pointer */ static const struct bpf_reg_val rvfp = { .v = { @@ -2246,6 +2421,8 @@ evaluate(struct bpf_verifier *bvf) next = node; rc = 0; + memset(&stats, 0, sizeof(stats)); + while (node != NULL && rc == 0) { /* @@ -2259,11 +2436,14 @@ evaluate(struct bpf_verifier *bvf) op = ins[idx].code; /* for jcc node make a copy of evaluation state */ - if (node->nb_edge > 1) - rc |= save_eval_state(bvf, node); + if (node->nb_edge > 1) { + rc |= save_cur_eval_state(bvf, node); + stats.nb_save++; + } if (ins_chk[op].eval != NULL && rc == 0) { err = ins_chk[op].eval(bvf, ins + idx); + stats.nb_eval++; if (err != NULL) { RTE_BPF_LOG_LINE(ERR, "%s: %s at pc: %u", __func__, err, idx); @@ -2277,21 +2457,37 @@ evaluate(struct bpf_verifier *bvf) /* proceed through CFG */ next = get_next_node(bvf, node); + if (next != NULL) { /* proceed with next child */ if (node->cur_edge == node->nb_edge && - node->evst != NULL) - restore_eval_state(bvf, node); + node->evst.cur != NULL) { + restore_cur_eval_state(bvf, node); + stats.nb_restore++; + } - next->prev_node = get_node_idx(bvf, node); - node = next; + /* + * for jcc targets: check did we already evaluated + * that path and can it's evaluation be skipped that + * time. + */ + if (node->nb_edge > 1 && prune_eval_state(bvf, node, + next) == 0) { + next = NULL; + stats.nb_prune++; + } else { + next->prev_node = get_node_idx(bvf, node); + node = next; + } } else { /* * finished with current node and all it's kids, - * proceed with parent + * mark it's @start state as safe for future references, + * and proceed with parent. */ node->cur_edge = 0; + save_safe_eval_state(bvf, node); node = get_prev_node(bvf, node); /* finished */ @@ -2300,6 +2496,14 @@ evaluate(struct bpf_verifier *bvf) } } + RTE_LOG(DEBUG, BPF, "%s(%p) returns %d, stats:\n" + "node evaluations=%u;\n" + "state pruned=%u;\n" + "state saves=%u;\n" + "state restores=%u;\n", + __func__, bvf, rc, + stats.nb_eval, stats.nb_prune, stats.nb_save, stats.nb_restore); + return rc; } -- 2.35.3 ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/3] bfp: fix load hangs with six IPv6 addresses 2024-06-27 11:55 ` [PATCH 2/3] bfp: fix load hangs with six IPv6 addresses Konstantin Ananyev @ 2024-06-27 15:13 ` Stephen Hemminger 0 siblings, 0 replies; 5+ messages in thread From: Stephen Hemminger @ 2024-06-27 15:13 UTC (permalink / raw) To: Konstantin Ananyev; +Cc: dev, Konstantin Ananyev, stable, Isaac Boukris On Thu, 27 Jun 2024 12:55:30 +0100 Konstantin Ananyev <konstantin.v.ananyev@yandex.ru> wrote: > From: Konstantin Ananyev <konstantin.ananyev@huawei.com> > > As described in: https://bugs.dpdk.org/show_bug.cgi?id=1465 > converting from following cBPF filter: > "host 1::1 or host 1::1 or host 1::1 or host 1::1 or host 1::1 or host 1::1" > taking too long for BPF verifier ito complete (up to 25 seconds). > Looking at it I didn't find any actual functional bug. > In fact, it does what is expected: goes through each possible path of > BPF program and evaluates register/stack state for each instruction. > The problem is that for program with a lot of conditional branches number > of possible paths starts to grow exponentially and such walk becomes very > excessive. > So to minimize number of evaluations, this patch implements heuristic > similar to what Linux kernel does - state pruning: > If from given instruction for given program state we explore all possible > paths and for each of them reach bpf_exit() without any complaints and a > valid R0 value, then for that instruction this program state can be > marked as 'safe'. When we later arrive at the same instruction with a state > equivalent to an earlier instruction 'safe' state, we can prune the search. > For now, only states for JCC targets are saved/examined. > Plus added few extra logging for DEBUG level. > > Bugzilla ID: 1465 > Fixes: 8021917293d0 ("bpf: add extra validation for input BPF program") > Cc: stable@dpdk.org Acked-by: Stephen Hemminger <stephen@networkplumber.org> > + struct bpf_eval_state *cur; /*save/restore for jcc targets */ Nit: should have space after /* in this comment ^ permalink raw reply [flat|nested] 5+ messages in thread
[parent not found: <20240627180442.1602-1-konstantin.v.ananyev@yandex.ru>]
* [PATCH v2 1/3] bfp: fix MOV instruction evaluation [not found] ` <20240627180442.1602-1-konstantin.v.ananyev@yandex.ru> @ 2024-06-27 18:04 ` Konstantin Ananyev 2024-06-27 18:04 ` [PATCH v2 2/3] bfp: fix load hangs with six IPv6 addresses Konstantin Ananyev 1 sibling, 0 replies; 5+ messages in thread From: Konstantin Ananyev @ 2024-06-27 18:04 UTC (permalink / raw) To: dev; +Cc: Konstantin Ananyev, stable, Morten Brørup From: Konstantin Ananyev <konstantin.ananyev@huawei.com> Verifier might left some register-state values uninitialized while evaluating MOV instructions. Add explicit initialization. Fixes: 8021917293d0 ("bpf: add extra validation for input BPF program") Cc: stable@dpdk.org Signed-off-by: Konstantin Ananyev <konstantin.ananyev@huawei.com> Acked-by: Morten Brørup <mb@smartsharesystems.com> --- lib/bpf/bpf_validate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c index 79be5e917d..11344fff4d 100644 --- a/lib/bpf/bpf_validate.c +++ b/lib/bpf/bpf_validate.c @@ -636,14 +636,14 @@ eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins) { uint64_t msk; uint32_t op; - size_t opsz; + size_t opsz, sz; const char *err; struct bpf_eval_state *st; struct bpf_reg_val *rd, rs; - opsz = (BPF_CLASS(ins->code) == BPF_ALU) ? + sz = (BPF_CLASS(ins->code) == BPF_ALU) ? sizeof(uint32_t) : sizeof(uint64_t); - opsz = opsz * CHAR_BIT; + opsz = sz * CHAR_BIT; msk = RTE_LEN2MASK(opsz, uint64_t); st = bvf->evst; @@ -652,8 +652,10 @@ eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins) if (BPF_SRC(ins->code) == BPF_X) { rs = st->rv[ins->src_reg]; eval_apply_mask(&rs, msk); - } else + } else { + rs = (struct bpf_reg_val){.v = {.size = sz,},}; eval_fill_imm(&rs, msk, ins->imm); + } eval_apply_mask(rd, msk); -- 2.35.3 ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 2/3] bfp: fix load hangs with six IPv6 addresses [not found] ` <20240627180442.1602-1-konstantin.v.ananyev@yandex.ru> 2024-06-27 18:04 ` [PATCH v2 1/3] bfp: fix MOV instruction evaluation Konstantin Ananyev @ 2024-06-27 18:04 ` Konstantin Ananyev 1 sibling, 0 replies; 5+ messages in thread From: Konstantin Ananyev @ 2024-06-27 18:04 UTC (permalink / raw) To: dev Cc: Konstantin Ananyev, stable, Isaac Boukris, Morten Brørup, Stephen Hemminger From: Konstantin Ananyev <konstantin.ananyev@huawei.com> As described in: https://bugs.dpdk.org/show_bug.cgi?id=1465 converting from following cBPF filter: "host 1::1 or host 1::1 or host 1::1 or host 1::1 or host 1::1 or host 1::1" taking too long for BPF verifier ito complete (up to 25 seconds). Looking at it I didn't find any actual functional bug. In fact, it does what is expected: goes through each possible path of BPF program and evaluates register/stack state for each instruction. The problem is that for program with a lot of conditional branches number of possible paths starts to grow exponentially and such walk becomes very excessive. So to minimize number of evaluations, this patch implements heuristic similar to what Linux kernel does - state pruning: If from given instruction for given program state we explore all possible paths and for each of them reach bpf_exit() without any complaints and a valid R0 value, then for that instruction this program state can be marked as 'safe'. When we later arrive at the same instruction with a state equivalent to an earlier instruction 'safe' state, we can prune the search. For now, only states for JCC targets are saved/examined. Plus added few extra logging for DEBUG level. Bugzilla ID: 1465 Fixes: 8021917293d0 ("bpf: add extra validation for input BPF program") Cc: stable@dpdk.org Reported-by: Isaac Boukris <iboukris@gmail.com> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@huawei.com> Acked-by: Morten Brørup <mb@smartsharesystems.com> Acked-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/bpf/bpf_validate.c | 305 ++++++++++++++++++++++++++++++++++------- 1 file changed, 255 insertions(+), 50 deletions(-) diff --git a/lib/bpf/bpf_validate.c b/lib/bpf/bpf_validate.c index 11344fff4d..3cfdc9ddf4 100644 --- a/lib/bpf/bpf_validate.c +++ b/lib/bpf/bpf_validate.c @@ -29,10 +29,13 @@ struct bpf_reg_val { }; struct bpf_eval_state { + SLIST_ENTRY(bpf_eval_state) next; /* for @safe list traversal */ struct bpf_reg_val rv[EBPF_REG_NUM]; struct bpf_reg_val sv[MAX_BPF_STACK_SIZE / sizeof(uint64_t)]; }; +SLIST_HEAD(bpf_evst_head, bpf_eval_state); + /* possible instruction node colour */ enum { WHITE, @@ -52,6 +55,9 @@ enum { #define MAX_EDGES 2 +/* max number of 'safe' evaluated states to track per node */ +#define NODE_EVST_MAX 32 + struct inst_node { uint8_t colour; uint8_t nb_edge:4; @@ -59,7 +65,18 @@ struct inst_node { uint8_t edge_type[MAX_EDGES]; uint32_t edge_dest[MAX_EDGES]; uint32_t prev_node; - struct bpf_eval_state *evst; + struct { + struct bpf_eval_state *cur; /* save/restore for jcc targets */ + struct bpf_eval_state *start; + struct bpf_evst_head safe; /* safe states for track/prune */ + uint32_t nb_safe; + } evst; +}; + +struct evst_pool { + uint32_t num; + uint32_t cur; + struct bpf_eval_state *ent; }; struct bpf_verifier { @@ -73,11 +90,8 @@ struct bpf_verifier { uint32_t edge_type[MAX_EDGE_TYPE]; struct bpf_eval_state *evst; struct inst_node *evin; - struct { - uint32_t num; - uint32_t cur; - struct bpf_eval_state *ent; - } evst_pool; + struct evst_pool evst_sr_pool; /* for evst save/restore */ + struct evst_pool evst_tp_pool; /* for evst track/prune */ }; struct bpf_ins_check { @@ -1085,7 +1099,7 @@ eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins) struct bpf_reg_val rvf, rvt; tst = bvf->evst; - fst = bvf->evin->evst; + fst = bvf->evin->evst.cur; frd = fst->rv + ins->dst_reg; trd = tst->rv + ins->dst_reg; @@ -1814,8 +1828,8 @@ add_edge(struct bpf_verifier *bvf, struct inst_node *node, uint32_t nidx) uint32_t ne; if (nidx > bvf->prm->nb_ins) { - RTE_BPF_LOG_LINE(ERR, "%s: program boundary violation at pc: %u, " - "next pc: %u", + RTE_BPF_LOG_LINE(ERR, + "%s: program boundary violation at pc: %u, next pc: %u", __func__, get_node_idx(bvf, node), nidx); return -EINVAL; } @@ -2091,60 +2105,114 @@ validate(struct bpf_verifier *bvf) * helper functions get/free eval states. */ static struct bpf_eval_state * -pull_eval_state(struct bpf_verifier *bvf) +pull_eval_state(struct evst_pool *pool) { uint32_t n; - n = bvf->evst_pool.cur; - if (n == bvf->evst_pool.num) + n = pool->cur; + if (n == pool->num) return NULL; - bvf->evst_pool.cur = n + 1; - return bvf->evst_pool.ent + n; + pool->cur = n + 1; + return pool->ent + n; } static void -push_eval_state(struct bpf_verifier *bvf) +push_eval_state(struct evst_pool *pool) { - bvf->evst_pool.cur--; + RTE_ASSERT(pool->cur != 0); + pool->cur--; } static void evst_pool_fini(struct bpf_verifier *bvf) { bvf->evst = NULL; - free(bvf->evst_pool.ent); - memset(&bvf->evst_pool, 0, sizeof(bvf->evst_pool)); + free(bvf->evst_sr_pool.ent); + memset(&bvf->evst_sr_pool, 0, sizeof(bvf->evst_sr_pool)); + memset(&bvf->evst_tp_pool, 0, sizeof(bvf->evst_tp_pool)); } static int evst_pool_init(struct bpf_verifier *bvf) { - uint32_t n; + uint32_t k, n; - n = bvf->nb_jcc_nodes + 1; + /* + * We need nb_jcc_nodes + 1 for save_cur/restore_cur + * remaining ones will be used for state tracking/pruning. + */ + k = bvf->nb_jcc_nodes + 1; + n = k * 3; - bvf->evst_pool.ent = calloc(n, sizeof(bvf->evst_pool.ent[0])); - if (bvf->evst_pool.ent == NULL) + bvf->evst_sr_pool.ent = calloc(n, sizeof(bvf->evst_sr_pool.ent[0])); + if (bvf->evst_sr_pool.ent == NULL) return -ENOMEM; - bvf->evst_pool.num = n; - bvf->evst_pool.cur = 0; + bvf->evst_sr_pool.num = k; + bvf->evst_sr_pool.cur = 0; - bvf->evst = pull_eval_state(bvf); + bvf->evst_tp_pool.ent = bvf->evst_sr_pool.ent + k; + bvf->evst_tp_pool.num = n - k; + bvf->evst_tp_pool.cur = 0; + + bvf->evst = pull_eval_state(&bvf->evst_sr_pool); return 0; } +/* + * try to allocate and initialise new eval state for given node. + * later if no errors will be encountered, this state will be accepted as + * one of the possible 'safe' states for that node. + */ +static void +save_start_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +{ + RTE_ASSERT(node->evst.start == NULL); + + /* limit number of states for one node with some reasonable value */ + if (node->evst.nb_safe >= NODE_EVST_MAX) + return; + + /* try to get new eval_state */ + node->evst.start = pull_eval_state(&bvf->evst_tp_pool); + + /* make a copy of current state */ + if (node->evst.start != NULL) { + memcpy(node->evst.start, bvf->evst, sizeof(*node->evst.start)); + SLIST_NEXT(node->evst.start, next) = NULL; + } +} + +/* + * add @start state to the list of @safe states. + */ +static void +save_safe_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +{ + if (node->evst.start == NULL) + return; + + SLIST_INSERT_HEAD(&node->evst.safe, node->evst.start, next); + node->evst.nb_safe++; + + RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u,state=%p): nb_safe=%u;", + __func__, bvf, get_node_idx(bvf, node), node->evst.start, + node->evst.nb_safe); + + node->evst.start = NULL; +} + /* * Save current eval state. */ static int -save_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +save_cur_eval_state(struct bpf_verifier *bvf, struct inst_node *node) { struct bpf_eval_state *st; /* get new eval_state for this node */ - st = pull_eval_state(bvf); + st = pull_eval_state(&bvf->evst_sr_pool); if (st == NULL) { RTE_BPF_LOG_LINE(ERR, "%s: internal error (out of space) at pc: %u", @@ -2156,11 +2224,13 @@ save_eval_state(struct bpf_verifier *bvf, struct inst_node *node) memcpy(st, bvf->evst, sizeof(*st)); /* swap current state with new one */ - node->evst = bvf->evst; + RTE_ASSERT(node->evst.cur == NULL); + node->evst.cur = bvf->evst; bvf->evst = st; RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;", - __func__, bvf, get_node_idx(bvf, node), node->evst, bvf->evst); + __func__, bvf, get_node_idx(bvf, node), node->evst.cur, + bvf->evst); return 0; } @@ -2169,14 +2239,15 @@ save_eval_state(struct bpf_verifier *bvf, struct inst_node *node) * Restore previous eval state and mark current eval state as free. */ static void -restore_eval_state(struct bpf_verifier *bvf, struct inst_node *node) +restore_cur_eval_state(struct bpf_verifier *bvf, struct inst_node *node) { RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;", - __func__, bvf, get_node_idx(bvf, node), bvf->evst, node->evst); + __func__, bvf, get_node_idx(bvf, node), bvf->evst, + node->evst.cur); - bvf->evst = node->evst; - node->evst = NULL; - push_eval_state(bvf); + bvf->evst = node->evst.cur; + node->evst.cur = NULL; + push_eval_state(&bvf->evst_sr_pool); } static void @@ -2193,26 +2264,124 @@ log_dbg_eval_state(const struct bpf_verifier *bvf, const struct ebpf_insn *ins, RTE_LOG(DEBUG, BPF, "r%u={\n" - "\tv={type=%u, size=%zu},\n" + "\tv={type=%u, size=%zu, buf_size=%zu},\n" "\tmask=0x%" PRIx64 ",\n" "\tu={min=0x%" PRIx64 ", max=0x%" PRIx64 "},\n" "\ts={min=%" PRId64 ", max=%" PRId64 "},\n" "};\n", ins->dst_reg, - rv->v.type, rv->v.size, + rv->v.type, rv->v.size, rv->v.buf_size, rv->mask, rv->u.min, rv->u.max, rv->s.min, rv->s.max); } /* - * Do second pass through CFG and try to evaluate instructions - * via each possible path. - * Right now evaluation functionality is quite limited. - * Still need to add extra checks for: - * - use/return uninitialized registers. - * - use uninitialized data from the stack. - * - memory boundaries violation. + * compare two evaluation states. + * returns zero if @lv is more conservative (safer) then @rv. + * returns non-zero value otherwise. + */ +static int +cmp_reg_val_within(const struct bpf_reg_val *lv, const struct bpf_reg_val *rv) +{ + /* expect @v and @mask to be identical */ + if (memcmp(&lv->v, &rv->v, sizeof(lv->v)) != 0 || lv->mask != rv->mask) + return -1; + + /* exact match only for mbuf and stack pointers */ + if (lv->v.type == RTE_BPF_ARG_PTR_MBUF || + lv->v.type == BPF_ARG_PTR_STACK) + return -1; + + if (lv->u.min <= rv->u.min && lv->u.max >= rv->u.max && + lv->s.min <= rv->s.min && lv->s.max >= rv->s.max) + return 0; + + return -1; +} + +/* + * compare two evaluation states. + * returns zero if they are identical. + * returns positive value if @lv is more conservative (safer) then @rv. + * returns negative value otherwise. + */ +static int +cmp_eval_state(const struct bpf_eval_state *lv, const struct bpf_eval_state *rv) +{ + int32_t rc; + uint32_t i, k; + + /* for stack expect identical values */ + rc = memcmp(lv->sv, rv->sv, sizeof(lv->sv)); + if (rc != 0) + return -(2 * EBPF_REG_NUM); + + k = 0; + /* check register values */ + for (i = 0; i != RTE_DIM(lv->rv); i++) { + rc = memcmp(&lv->rv[i], &rv->rv[i], sizeof(lv->rv[i])); + if (rc != 0 && cmp_reg_val_within(&lv->rv[i], &rv->rv[i]) != 0) + return -(i + 1); + k += (rc != 0); + } + + return k; +} + +/* + * check did we already evaluated that path and can it be pruned that time. + */ +static int +prune_eval_state(struct bpf_verifier *bvf, const struct inst_node *node, + struct inst_node *next) +{ + int32_t rc; + struct bpf_eval_state *safe; + + rc = INT32_MIN; + SLIST_FOREACH(safe, &next->evst.safe, next) { + rc = cmp_eval_state(safe, bvf->evst); + if (rc >= 0) + break; + } + + rc = (rc >= 0) ? 0 : -1; + + /* + * current state doesn't match any safe states, + * so no prunning is possible right now, + * track current state for future references. + */ + if (rc != 0) + save_start_eval_state(bvf, next); + + RTE_BPF_LOG_LINE(DEBUG, "%s(bvf=%p,node=%u,next=%u) returns %d, " + "next->evst.start=%p, next->evst.nb_safe=%u", + __func__, bvf, get_node_idx(bvf, node), + get_node_idx(bvf, next), rc, + next->evst.start, next->evst.nb_safe); + return rc; +} + +/* Do second pass through CFG and try to evaluate instructions + * via each possible path. The verifier will try all paths, tracking types of + * registers used as input to instructions, and updating resulting type via + * register state values. Plus for each register and possible stack value it + * tries to estimate possible max/min value. + * For conditional jumps, a stack is used to save evaluation state, so one + * path is explored while the state for the other path is pushed onto the stack. + * Then later, we backtrack to the first pushed instruction and repeat the cycle + * until the stack is empty and we're done. + * For program with many conditional branches walking through all possible path + * could be very excessive. So to minimize number of evaluations we use + * heuristic similar to what Linux kernel does - state pruning: + * If from given instruction for given program state we explore all possible + * paths and for each of them reach _exit() without any complaints and a valid + * R0 value, then for that instruction, that program state can be marked as + * 'safe'. When we later arrive at the same instruction with a state + * equivalent to an earlier instruction's 'safe' state, we can prune the search. + * For now, only states for JCC targets are saved/examined. */ static int evaluate(struct bpf_verifier *bvf) @@ -2223,6 +2392,13 @@ evaluate(struct bpf_verifier *bvf) const struct ebpf_insn *ins; struct inst_node *next, *node; + struct { + uint32_t nb_eval; + uint32_t nb_prune; + uint32_t nb_save; + uint32_t nb_restore; + } stats; + /* initial state of frame pointer */ static const struct bpf_reg_val rvfp = { .v = { @@ -2246,6 +2422,8 @@ evaluate(struct bpf_verifier *bvf) next = node; rc = 0; + memset(&stats, 0, sizeof(stats)); + while (node != NULL && rc == 0) { /* @@ -2259,11 +2437,14 @@ evaluate(struct bpf_verifier *bvf) op = ins[idx].code; /* for jcc node make a copy of evaluation state */ - if (node->nb_edge > 1) - rc |= save_eval_state(bvf, node); + if (node->nb_edge > 1) { + rc |= save_cur_eval_state(bvf, node); + stats.nb_save++; + } if (ins_chk[op].eval != NULL && rc == 0) { err = ins_chk[op].eval(bvf, ins + idx); + stats.nb_eval++; if (err != NULL) { RTE_BPF_LOG_LINE(ERR, "%s: %s at pc: %u", __func__, err, idx); @@ -2277,21 +2458,37 @@ evaluate(struct bpf_verifier *bvf) /* proceed through CFG */ next = get_next_node(bvf, node); + if (next != NULL) { /* proceed with next child */ if (node->cur_edge == node->nb_edge && - node->evst != NULL) - restore_eval_state(bvf, node); + node->evst.cur != NULL) { + restore_cur_eval_state(bvf, node); + stats.nb_restore++; + } - next->prev_node = get_node_idx(bvf, node); - node = next; + /* + * for jcc targets: check did we already evaluated + * that path and can it's evaluation be skipped that + * time. + */ + if (node->nb_edge > 1 && prune_eval_state(bvf, node, + next) == 0) { + next = NULL; + stats.nb_prune++; + } else { + next->prev_node = get_node_idx(bvf, node); + node = next; + } } else { /* * finished with current node and all it's kids, - * proceed with parent + * mark it's @start state as safe for future references, + * and proceed with parent. */ node->cur_edge = 0; + save_safe_eval_state(bvf, node); node = get_prev_node(bvf, node); /* finished */ @@ -2300,6 +2497,14 @@ evaluate(struct bpf_verifier *bvf) } } + RTE_LOG(DEBUG, BPF, "%s(%p) returns %d, stats:\n" + "node evaluations=%u;\n" + "state pruned=%u;\n" + "state saves=%u;\n" + "state restores=%u;\n", + __func__, bvf, rc, + stats.nb_eval, stats.nb_prune, stats.nb_save, stats.nb_restore); + return rc; } -- 2.35.3 ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-06-27 18:06 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <20240627115531.1440-1-konstantin.v.ananyev@yandex.ru> 2024-06-27 11:55 ` [PATCH 1/3] bfp: fix MOV instruction evaluation Konstantin Ananyev 2024-06-27 11:55 ` [PATCH 2/3] bfp: fix load hangs with six IPv6 addresses Konstantin Ananyev 2024-06-27 15:13 ` Stephen Hemminger [not found] ` <20240627180442.1602-1-konstantin.v.ananyev@yandex.ru> 2024-06-27 18:04 ` [PATCH v2 1/3] bfp: fix MOV instruction evaluation Konstantin Ananyev 2024-06-27 18:04 ` [PATCH v2 2/3] bfp: fix load hangs with six IPv6 addresses Konstantin Ananyev
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).