diff options
| author | Joanne Koong <joannekoong@fb.com> | 2021-11-29 19:06:21 -0800 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2021-11-30 10:56:28 -0800 |
| commit | f6e659b7f97c76d0471d12bf274ea2a097cf3c5c (patch) | |
| tree | b8130dd0f6a15b2b28121d425169ef8f707382c2 /tools/testing/selftests/bpf/progs/strobemeta.h | |
| parent | selftests/bpf: Add bpf_loop test (diff) | |
| download | linux-f6e659b7f97c76d0471d12bf274ea2a097cf3c5c.tar.gz linux-f6e659b7f97c76d0471d12bf274ea2a097cf3c5c.zip | |
selftests/bpf: Measure bpf_loop verifier performance
This patch tests bpf_loop in pyperf and strobemeta, and measures the
verifier performance of replacing the traditional for loop
with bpf_loop.
The results are as follows:
~strobemeta~
Baseline
verification time 6808200 usec
stack depth 496
processed 554252 insns (limit 1000000) max_states_per_insn 16
total_states 15878 peak_states 13489 mark_read 3110
#192 verif_scale_strobemeta:OK (unrolled loop)
Using bpf_loop
verification time 31589 usec
stack depth 96+400
processed 1513 insns (limit 1000000) max_states_per_insn 2
total_states 106 peak_states 106 mark_read 60
#193 verif_scale_strobemeta_bpf_loop:OK
~pyperf600~
Baseline
verification time 29702486 usec
stack depth 368
processed 626838 insns (limit 1000000) max_states_per_insn 7
total_states 30368 peak_states 30279 mark_read 748
#182 verif_scale_pyperf600:OK (unrolled loop)
Using bpf_loop
verification time 148488 usec
stack depth 320+40
processed 10518 insns (limit 1000000) max_states_per_insn 10
total_states 705 peak_states 517 mark_read 38
#183 verif_scale_pyperf600_bpf_loop:OK
Using the bpf_loop helper led to approximately a 99% decrease
in the verification time and in the number of instructions.
Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com
Diffstat (limited to 'tools/testing/selftests/bpf/progs/strobemeta.h')
| -rw-r--r-- | tools/testing/selftests/bpf/progs/strobemeta.h | 75 |
1 files changed, 72 insertions, 3 deletions
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 60c93aee2f4a..753718595c26 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, return payload; } +#ifdef USE_BPF_LOOP +enum read_type { + READ_INT_VAR, + READ_MAP_VAR, + READ_STR_VAR, +}; + +struct read_var_ctx { + struct strobemeta_payload *data; + void *tls_base; + struct strobemeta_cfg *cfg; + void *payload; + /* value gets mutated */ + struct strobe_value_generic *value; + enum read_type type; +}; + +static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +{ + switch (ctx->type) { + case READ_INT_VAR: + if (index >= STROBE_MAX_INTS) + return 1; + read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data); + break; + case READ_MAP_VAR: + if (index >= STROBE_MAX_MAPS) + return 1; + ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload); + break; + case READ_STR_VAR: + if (index >= STROBE_MAX_STRS) + return 1; + ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload); + break; + } + return 0; +} +#endif /* USE_BPF_LOOP */ + /* * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends @@ -475,11 +517,36 @@ static void *read_strobe_meta(struct task_struct *task, */ tls_base = (void *)task; +#ifdef USE_BPF_LOOP + struct read_var_ctx ctx = { + .cfg = cfg, + .tls_base = tls_base, + .value = &value, + .data = data, + .payload = payload, + }; + int err; + + ctx.type = READ_INT_VAR; + err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_INTS) + return NULL; + + ctx.type = READ_STR_VAR; + err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_STRS) + return NULL; + + ctx.type = READ_MAP_VAR; + err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_MAPS) + return NULL; +#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_INTS; ++i) { read_int_var(cfg, i, tls_base, &value, data); } @@ -487,7 +554,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { payload += read_str_var(cfg, i, tls_base, &value, data, payload); } @@ -495,10 +562,12 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { payload = read_map_var(cfg, i, tls_base, &value, data, payload); } +#endif /* USE_BPF_LOOP */ + /* * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent |
