aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arena.c4
-rw-r--r--kernel/bpf/bpf_iter.c13
-rw-r--r--kernel/bpf/bpf_lsm.c2
-rw-r--r--kernel/bpf/bpf_struct_ops.c135
-rw-r--r--kernel/bpf/btf.c126
-rw-r--r--kernel/bpf/core.c19
-rw-r--r--kernel/bpf/helpers.c45
-rw-r--r--kernel/bpf/syscall.c1
-rw-r--r--kernel/bpf/verifier.c350
9 files changed, 503 insertions, 192 deletions
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 095a9554e1de..647b709d7d77 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -577,8 +577,8 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(arena_kfuncs)
-BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2)
+BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
BTF_KFUNCS_END(arena_kfuncs)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 106735145948..380e9a7cac75 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -335,7 +335,7 @@ static void cache_btf_id(struct bpf_iter_target_info *tinfo,
tinfo->btf_id = prog->aux->attach_btf_id;
}
-bool bpf_iter_prog_supported(struct bpf_prog *prog)
+int bpf_iter_prog_supported(struct bpf_prog *prog)
{
const char *attach_fname = prog->aux->attach_func_name;
struct bpf_iter_target_info *tinfo = NULL, *iter;
@@ -344,7 +344,7 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
int prefix_len = strlen(prefix);
if (strncmp(attach_fname, prefix, prefix_len))
- return false;
+ return -EINVAL;
mutex_lock(&targets_mutex);
list_for_each_entry(iter, &targets, list) {
@@ -360,12 +360,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
}
mutex_unlock(&targets_mutex);
- if (tinfo) {
- prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
- prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
- }
+ if (!tinfo)
+ return -EINVAL;
- return tinfo != NULL;
+ return bpf_prog_ctx_arg_info_init(prog, tinfo->reg_info->ctx_arg_info,
+ tinfo->reg_info->ctx_arg_info_size);
}
const struct bpf_func_proto *
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 967492b65185..0a59df1c550a 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -316,7 +316,9 @@ BTF_ID(func, bpf_lsm_inode_getxattr)
BTF_ID(func, bpf_lsm_inode_mknod)
BTF_ID(func, bpf_lsm_inode_need_killpriv)
BTF_ID(func, bpf_lsm_inode_post_setxattr)
+BTF_ID(func, bpf_lsm_inode_post_removexattr)
BTF_ID(func, bpf_lsm_inode_readlink)
+BTF_ID(func, bpf_lsm_inode_removexattr)
BTF_ID(func, bpf_lsm_inode_rename)
BTF_ID(func, bpf_lsm_inode_rmdir)
BTF_ID(func, bpf_lsm_inode_setattr)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 040fb1cd840b..db13ee70d94d 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -146,39 +146,7 @@ void bpf_struct_ops_image_free(void *image)
}
#define MAYBE_NULL_SUFFIX "__nullable"
-#define MAX_STUB_NAME 128
-
-/* Return the type info of a stub function, if it exists.
- *
- * The name of a stub function is made up of the name of the struct_ops and
- * the name of the function pointer member, separated by "__". For example,
- * if the struct_ops type is named "foo_ops" and the function pointer
- * member is named "bar", the stub function name would be "foo_ops__bar".
- */
-static const struct btf_type *
-find_stub_func_proto(const struct btf *btf, const char *st_op_name,
- const char *member_name)
-{
- char stub_func_name[MAX_STUB_NAME];
- const struct btf_type *func_type;
- s32 btf_id;
- int cp;
-
- cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
- st_op_name, member_name);
- if (cp >= MAX_STUB_NAME) {
- pr_warn("Stub function name too long\n");
- return NULL;
- }
- btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC);
- if (btf_id < 0)
- return NULL;
- func_type = btf_type_by_id(btf, btf_id);
- if (!func_type)
- return NULL;
-
- return btf_type_by_id(btf, func_type->type); /* FUNC_PROTO */
-}
+#define REFCOUNTED_SUFFIX "__ref"
/* Prepare argument info for every nullable argument of a member of a
* struct_ops type.
@@ -203,27 +171,44 @@ find_stub_func_proto(const struct btf *btf, const char *st_op_name,
static int prepare_arg_info(struct btf *btf,
const char *st_ops_name,
const char *member_name,
- const struct btf_type *func_proto,
+ const struct btf_type *func_proto, void *stub_func_addr,
struct bpf_struct_ops_arg_info *arg_info)
{
const struct btf_type *stub_func_proto, *pointed_type;
+ bool is_nullable = false, is_refcounted = false;
const struct btf_param *stub_args, *args;
struct bpf_ctx_arg_aux *info, *info_buf;
u32 nargs, arg_no, info_cnt = 0;
+ char ksym[KSYM_SYMBOL_LEN];
+ const char *stub_fname;
+ const char *suffix;
+ s32 stub_func_id;
u32 arg_btf_id;
int offset;
- stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name);
- if (!stub_func_proto)
- return 0;
+ stub_fname = kallsyms_lookup((unsigned long)stub_func_addr, NULL, NULL, NULL, ksym);
+ if (!stub_fname) {
+ pr_warn("Cannot find the stub function name for the %s in struct %s\n",
+ member_name, st_ops_name);
+ return -ENOENT;
+ }
+
+ stub_func_id = btf_find_by_name_kind(btf, stub_fname, BTF_KIND_FUNC);
+ if (stub_func_id < 0) {
+ pr_warn("Cannot find the stub function %s in btf\n", stub_fname);
+ return -ENOENT;
+ }
+
+ stub_func_proto = btf_type_by_id(btf, stub_func_id);
+ stub_func_proto = btf_type_by_id(btf, stub_func_proto->type);
/* Check if the number of arguments of the stub function is the same
* as the number of arguments of the function pointer.
*/
nargs = btf_type_vlen(func_proto);
if (nargs != btf_type_vlen(stub_func_proto)) {
- pr_warn("the number of arguments of the stub function %s__%s does not match the number of arguments of the member %s of struct %s\n",
- st_ops_name, member_name, member_name, st_ops_name);
+ pr_warn("the number of arguments of the stub function %s does not match the number of arguments of the member %s of struct %s\n",
+ stub_fname, member_name, st_ops_name);
return -EINVAL;
}
@@ -241,10 +226,18 @@ static int prepare_arg_info(struct btf *btf,
info = info_buf;
for (arg_no = 0; arg_no < nargs; arg_no++) {
/* Skip arguments that is not suffixed with
- * "__nullable".
+ * "__nullable or __ref".
*/
- if (!btf_param_match_suffix(btf, &stub_args[arg_no],
- MAYBE_NULL_SUFFIX))
+ is_nullable = btf_param_match_suffix(btf, &stub_args[arg_no],
+ MAYBE_NULL_SUFFIX);
+ is_refcounted = btf_param_match_suffix(btf, &stub_args[arg_no],
+ REFCOUNTED_SUFFIX);
+
+ if (is_nullable)
+ suffix = MAYBE_NULL_SUFFIX;
+ else if (is_refcounted)
+ suffix = REFCOUNTED_SUFFIX;
+ else
continue;
/* Should be a pointer to struct */
@@ -253,30 +246,34 @@ static int prepare_arg_info(struct btf *btf,
&arg_btf_id);
if (!pointed_type ||
!btf_type_is_struct(pointed_type)) {
- pr_warn("stub function %s__%s has %s tagging to an unsupported type\n",
- st_ops_name, member_name, MAYBE_NULL_SUFFIX);
+ pr_warn("stub function %s has %s tagging to an unsupported type\n",
+ stub_fname, suffix);
goto err_out;
}
offset = btf_ctx_arg_offset(btf, func_proto, arg_no);
if (offset < 0) {
- pr_warn("stub function %s__%s has an invalid trampoline ctx offset for arg#%u\n",
- st_ops_name, member_name, arg_no);
+ pr_warn("stub function %s has an invalid trampoline ctx offset for arg#%u\n",
+ stub_fname, arg_no);
goto err_out;
}
if (args[arg_no].type != stub_args[arg_no].type) {
- pr_warn("arg#%u type in stub function %s__%s does not match with its original func_proto\n",
- arg_no, st_ops_name, member_name);
+ pr_warn("arg#%u type in stub function %s does not match with its original func_proto\n",
+ arg_no, stub_fname);
goto err_out;
}
/* Fill the information of the new argument */
- info->reg_type =
- PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
info->btf_id = arg_btf_id;
info->btf = btf;
info->offset = offset;
+ if (is_nullable) {
+ info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
+ } else if (is_refcounted) {
+ info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID;
+ info->refcounted = true;
+ }
info++;
info_cnt++;
@@ -324,6 +321,13 @@ static bool is_module_member(const struct btf *btf, u32 id)
return !strcmp(btf_name_by_offset(btf, t->name_off), "module");
}
+int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
+{
+ void *func_ptr = *(void **)(st_ops->cfi_stubs + moff);
+
+ return func_ptr ? 0 : -ENOTSUPP;
+}
+
int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
struct btf *btf,
struct bpf_verifier_log *log)
@@ -386,8 +390,11 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
st_ops_desc->value_type = btf_type_by_id(btf, value_id);
for_each_member(i, t, member) {
- const struct btf_type *func_proto;
+ const struct btf_type *func_proto, *ret_type;
+ void **stub_func_addr;
+ u32 moff;
+ moff = __btf_member_bit_offset(t, member) / 8;
mname = btf_name_by_offset(btf, member->name_off);
if (!*mname) {
pr_warn("anon member in struct %s is not supported\n",
@@ -413,9 +420,23 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
func_proto = btf_type_resolve_func_ptr(btf,
member->type,
NULL);
- if (!func_proto)
+
+ /* The member is not a function pointer or
+ * the function pointer is not supported.
+ */
+ if (!func_proto || bpf_struct_ops_supported(st_ops, moff))
continue;
+ if (func_proto->type) {
+ ret_type = btf_type_resolve_ptr(btf, func_proto->type, NULL);
+ if (ret_type && !__btf_type_is_struct(ret_type)) {
+ pr_warn("func ptr %s in struct %s returns non-struct pointer, which is not supported\n",
+ mname, st_ops->name);
+ err = -EOPNOTSUPP;
+ goto errout;
+ }
+ }
+
if (btf_distill_func_proto(log, btf,
func_proto, mname,
&st_ops->func_models[i])) {
@@ -425,8 +446,9 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
goto errout;
}
+ stub_func_addr = *(void **)(st_ops->cfi_stubs + moff);
err = prepare_arg_info(btf, st_ops->name, mname,
- func_proto,
+ func_proto, stub_func_addr,
arg_info + i);
if (err)
goto errout;
@@ -1152,13 +1174,6 @@ void bpf_struct_ops_put(const void *kdata)
bpf_map_put(&st_map->map);
}
-int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
-{
- void *func_ptr = *(void **)(st_ops->cfi_stubs + moff);
-
- return func_ptr ? 0 : -ENOTSUPP;
-}
-
static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c3223e0db2f5..12426e10baf3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2575,7 +2575,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
+ if (btf_type_kflag(t) && !btf_type_is_type_tag(t)) {
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
return -EINVAL;
}
@@ -3332,6 +3332,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
u32 off, int sz, struct btf_field_info *info, u32 field_mask)
{
enum btf_field_type type;
+ const char *tag_value;
+ bool is_type_tag;
u32 res_id;
/* Permit modifiers on the pointer itself */
@@ -3341,19 +3343,20 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
if (!btf_type_is_ptr(t))
return BTF_FIELD_IGNORE;
t = btf_type_by_id(btf, t->type);
-
- if (!btf_type_is_type_tag(t))
+ is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t);
+ if (!is_type_tag)
return BTF_FIELD_IGNORE;
/* Reject extra tags */
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
return -EINVAL;
- if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off)))
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ if (!strcmp("kptr_untrusted", tag_value))
type = BPF_KPTR_UNREF;
- else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("kptr", tag_value))
type = BPF_KPTR_REF;
- else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("percpu_kptr", tag_value))
type = BPF_KPTR_PERCPU;
- else if (!strcmp("uptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("uptr", tag_value))
type = BPF_UPTR;
else
return -EINVAL;
@@ -4944,11 +4947,6 @@ static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
- btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
- return -EINVAL;
- }
-
component_idx = btf_type_decl_tag(t)->component_idx;
if (component_idx < -1) {
btf_verifier_log_type(env, t, "Invalid component_idx");
@@ -6507,6 +6505,8 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
/* rxrpc */
{ "rxrpc_recvdata", 0x1 },
{ "rxrpc_resend", 0x10 },
+ { "rxrpc_tq", 0x10 },
+ { "rxrpc_client", 0x1 },
/* skb */
{"kfree_skb", 0x1000},
/* sunrpc */
@@ -6529,6 +6529,103 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
{ "mr_integ_alloc", 0x2000 },
/* bpf_testmod */
{ "bpf_testmod_test_read", 0x0 },
+ /* amdgpu */
+ { "amdgpu_vm_bo_map", 0x1 },
+ { "amdgpu_vm_bo_unmap", 0x1 },
+ /* netfs */
+ { "netfs_folioq", 0x1 },
+ /* xfs from xfs_defer_pending_class */
+ { "xfs_defer_create_intent", 0x1 },
+ { "xfs_defer_cancel_list", 0x1 },
+ { "xfs_defer_pending_finish", 0x1 },
+ { "xfs_defer_pending_abort", 0x1 },
+ { "xfs_defer_relog_intent", 0x1 },
+ { "xfs_defer_isolate_paused", 0x1 },
+ { "xfs_defer_item_pause", 0x1 },
+ { "xfs_defer_item_unpause", 0x1 },
+ /* xfs from xfs_defer_pending_item_class */
+ { "xfs_defer_add_item", 0x1 },
+ { "xfs_defer_cancel_item", 0x1 },
+ { "xfs_defer_finish_item", 0x1 },
+ /* xfs from xfs_icwalk_class */
+ { "xfs_ioc_free_eofblocks", 0x10 },
+ { "xfs_blockgc_free_space", 0x10 },
+ /* xfs from xfs_btree_cur_class */
+ { "xfs_btree_updkeys", 0x100 },
+ { "xfs_btree_overlapped_query_range", 0x100 },
+ /* xfs from xfs_imap_class*/
+ { "xfs_map_blocks_found", 0x10000 },
+ { "xfs_map_blocks_alloc", 0x10000 },
+ { "xfs_iomap_alloc", 0x1000 },
+ { "xfs_iomap_found", 0x1000 },
+ /* xfs from xfs_fs_class */
+ { "xfs_inodegc_flush", 0x1 },
+ { "xfs_inodegc_push", 0x1 },
+ { "xfs_inodegc_start", 0x1 },
+ { "xfs_inodegc_stop", 0x1 },
+ { "xfs_inodegc_queue", 0x1 },
+ { "xfs_inodegc_throttle", 0x1 },
+ { "xfs_fs_sync_fs", 0x1 },
+ { "xfs_blockgc_start", 0x1 },
+ { "xfs_blockgc_stop", 0x1 },
+ { "xfs_blockgc_worker", 0x1 },
+ { "xfs_blockgc_flush_all", 0x1 },
+ /* xfs_scrub */
+ { "xchk_nlinks_live_update", 0x10 },
+ /* xfs_scrub from xchk_metapath_class */
+ { "xchk_metapath_lookup", 0x100 },
+ /* nfsd */
+ { "nfsd_dirent", 0x1 },
+ { "nfsd_file_acquire", 0x1001 },
+ { "nfsd_file_insert_err", 0x1 },
+ { "nfsd_file_cons_err", 0x1 },
+ /* nfs4 */
+ { "nfs4_setup_sequence", 0x1 },
+ { "pnfs_update_layout", 0x10000 },
+ { "nfs4_inode_callback_event", 0x200 },
+ { "nfs4_inode_stateid_callback_event", 0x200 },
+ /* nfs from pnfs_layout_event */
+ { "pnfs_mds_fallback_pg_init_read", 0x10000 },
+ { "pnfs_mds_fallback_pg_init_write", 0x10000 },
+ { "pnfs_mds_fallback_pg_get_mirror_count", 0x10000 },
+ { "pnfs_mds_fallback_read_done", 0x10000 },
+ { "pnfs_mds_fallback_write_done", 0x10000 },
+ { "pnfs_mds_fallback_read_pagelist", 0x10000 },
+ { "pnfs_mds_fallback_write_pagelist", 0x10000 },
+ /* coda */
+ { "coda_dec_pic_run", 0x10 },
+ { "coda_dec_pic_done", 0x10 },
+ /* cfg80211 */
+ { "cfg80211_scan_done", 0x11 },
+ { "rdev_set_coalesce", 0x10 },
+ { "cfg80211_report_wowlan_wakeup", 0x100 },
+ { "cfg80211_inform_bss_frame", 0x100 },
+ { "cfg80211_michael_mic_failure", 0x10000 },
+ /* cfg80211 from wiphy_work_event */
+ { "wiphy_work_queue", 0x10 },
+ { "wiphy_work_run", 0x10 },
+ { "wiphy_work_cancel", 0x10 },
+ { "wiphy_work_flush", 0x10 },
+ /* hugetlbfs */
+ { "hugetlbfs_alloc_inode", 0x10 },
+ /* spufs */
+ { "spufs_context", 0x10 },
+ /* kvm_hv */
+ { "kvm_page_fault_enter", 0x100 },
+ /* dpu */
+ { "dpu_crtc_setup_mixer", 0x100 },
+ /* binder */
+ { "binder_transaction", 0x100 },
+ /* bcachefs */
+ { "btree_path_free", 0x100 },
+ /* hfi1_tx */
+ { "hfi1_sdma_progress", 0x1000 },
+ /* iptfs */
+ { "iptfs_ingress_postq_event", 0x1000 },
+ /* neigh */
+ { "neigh_update", 0x10 },
+ /* snd_firewire_lib */
+ { "amdtp_packet", 0x100 },
};
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
@@ -6679,6 +6776,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->reg_type = ctx_arg_info->reg_type;
info->btf = ctx_arg_info->btf ? : btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
+ info->ref_obj_id = ctx_arg_info->ref_obj_id;
return true;
}
}
@@ -6745,7 +6843,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_type_tag(t)) {
+ if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off);
if (strcmp(tag_value, "user") == 0)
info->reg_type |= MEM_USER;
@@ -7004,7 +7102,7 @@ error:
/* check type tag */
t = btf_type_by_id(btf, mtype->type);
- if (btf_type_is_type_tag(t)) {
+ if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off);
/* check __user tag */
if (strcmp(tag_value, "user") == 0)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index da729cbbaeb9..a0200fbbace9 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2290,17 +2290,18 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
insn->code = BPF_JMP | BPF_CALL_ARGS;
}
#endif
-#else
+#endif
+
static unsigned int __bpf_prog_ret0_warn(const void *ctx,
const struct bpf_insn *insn)
{
/* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
- * is not working properly, so warn about it!
+ * is not working properly, or interpreter is being used when
+ * prog->jit_requested is not 0, so warn about it!
*/
WARN_ON_ONCE(1);
return 0;
}
-#endif
bool bpf_prog_map_compatible(struct bpf_map *map,
const struct bpf_prog *fp)
@@ -2380,8 +2381,18 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+ u32 idx = (round_up(stack_depth, 32) / 32) - 1;
- fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+ /* may_goto may cause stack size > 512, leading to idx out-of-bounds.
+ * But for non-JITed programs, we don't need bpf_func, so no bounds
+ * check needed.
+ */
+ if (!fp->jit_requested &&
+ !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) {
+ fp->bpf_func = interpreters[idx];
+ } else {
+ fp->bpf_func = __bpf_prog_ret0_warn;
+ }
#else
fp->bpf_func = __bpf_prog_ret0_warn;
#endif
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f27ce162427a..183298fc11ba 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3067,6 +3067,50 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
return ret + 1;
}
+/**
+ * bpf_copy_from_user_task_str() - Copy a string from an task's address space
+ * @dst: Destination address, in kernel space. This buffer must be
+ * at least @dst__sz bytes long.
+ * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL.
+ * @unsafe_ptr__ign: Source address in the task's address space.
+ * @tsk: The task whose address space will be used
+ * @flags: The only supported flag is BPF_F_PAD_ZEROS
+ *
+ * Copies a NUL terminated string from a task's address space to @dst__sz
+ * buffer. If user string is too long this will still ensure zero termination
+ * in the @dst__sz buffer unless buffer size is 0.
+ *
+ * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst__sz to 0 on success
+ * and memset all of @dst__sz on failure.
+ *
+ * Return: The number of copied bytes on success including the NUL terminator.
+ * A negative error code on failure.
+ */
+__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz,
+ const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk, u64 flags)
+{
+ int ret;
+
+ if (unlikely(flags & ~BPF_F_PAD_ZEROS))
+ return -EINVAL;
+
+ if (unlikely(dst__sz == 0))
+ return 0;
+
+ ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_ptr__ign, dst, dst__sz, 0);
+ if (ret < 0) {
+ if (flags & BPF_F_PAD_ZEROS)
+ memset(dst, 0, dst__sz);
+ return ret;
+ }
+
+ if (flags & BPF_F_PAD_ZEROS)
+ memset(dst + ret, 0, dst__sz - ret);
+
+ return ret + 1;
+}
+
/* Keep unsinged long in prototype so that kfunc is usable when emitted to
* vmlinux.h in BPF programs directly, but note that while in BPF prog, the
* unsigned long always points to 8-byte region on stack, the kernel may only
@@ -3174,6 +3218,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_get_kmem_cache)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e1e42e918ba7..dbd89c13dd32 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2314,6 +2314,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
kvfree(prog->aux->jited_linfo);
kvfree(prog->aux->linfo);
kfree(prog->aux->kfunc_tab);
+ kfree(prog->aux->ctx_arg_info);
if (prog->aux->attach_btf)
btf_put(prog->aux->attach_btf);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 60611df77957..fcdeb00e21c3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1545,6 +1545,17 @@ static void release_reference_state(struct bpf_verifier_state *state, int idx)
return;
}
+static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
+{
+ int i;
+
+ for (i = 0; i < state->acquired_refs; i++)
+ if (state->refs[i].id == ptr_id)
+ return true;
+
+ return false;
+}
+
static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
{
int i;
@@ -1600,6 +1611,14 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st
return NULL;
}
+static void update_peak_states(struct bpf_verifier_env *env)
+{
+ u32 cur_states;
+
+ cur_states = env->explored_states_size + env->free_list_size;
+ env->peak_states = max(env->peak_states, cur_states);
+}
+
static void free_func_state(struct bpf_func_state *state)
{
if (!state)
@@ -1622,6 +1641,50 @@ static void free_verifier_state(struct bpf_verifier_state *state,
kfree(state);
}
+/* struct bpf_verifier_state->{parent,loop_entry} refer to states
+ * that are in either of env->{expored_states,free_list}.
+ * In both cases the state is contained in struct bpf_verifier_state_list.
+ */
+static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
+{
+ if (st->parent)
+ return container_of(st->parent, struct bpf_verifier_state_list, state);
+ return NULL;
+}
+
+static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st)
+{
+ if (st->loop_entry)
+ return container_of(st->loop_entry, struct bpf_verifier_state_list, state);
+ return NULL;
+}
+
+/* A state can be freed if it is no longer referenced:
+ * - is in the env->free_list;
+ * - has no children states;
+ * - is not used as loop_entry.
+ *
+ * Freeing a state can make it's loop_entry free-able.
+ */
+static void maybe_free_verifier_state(struct bpf_verifier_env *env,
+ struct bpf_verifier_state_list *sl)
+{
+ struct bpf_verifier_state_list *loop_entry_sl;
+
+ while (sl && sl->in_free_list &&
+ sl->state.branches == 0 &&
+ sl->state.used_as_loop_entry == 0) {
+ loop_entry_sl = state_loop_entry_as_list(&sl->state);
+ if (loop_entry_sl)
+ loop_entry_sl->state.used_as_loop_entry--;
+ list_del(&sl->node);
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ env->free_list_size--;
+ sl = loop_entry_sl;
+ }
+}
+
/* copy verifier state from src to dst growing dst stack space
* when necessary to accommodate larger src stack
*/
@@ -1661,6 +1724,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->callback_unroll_depth = src->callback_unroll_depth;
dst_state->used_as_loop_entry = src->used_as_loop_entry;
dst_state->may_goto_depth = src->may_goto_depth;
+ dst_state->loop_entry = src->loop_entry;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
if (!dst) {
@@ -1681,7 +1745,7 @@ static u32 state_htab_size(struct bpf_verifier_env *env)
return env->prog->len;
}
-static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
+static struct list_head *explored_state(struct bpf_verifier_env *env, int idx)
{
struct bpf_verifier_state *cur = env->cur_state;
struct bpf_func_state *state = cur->frame[cur->curframe];
@@ -1789,16 +1853,13 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
* # Find outermost loop entry known for n
* def get_loop_entry(n):
* h = entries.get(n, None)
- * while h in entries and entries[h] != h:
+ * while h in entries:
* h = entries[h]
* return h
*
- * # Update n's loop entry if h's outermost entry comes
- * # before n's outermost entry in current DFS path.
+ * # Update n's loop entry if h comes before n in current DFS path.
* def update_loop_entry(n, h):
- * n1 = get_loop_entry(n) or n
- * h1 = get_loop_entry(h) or h
- * if h1 in path and depths[h1] <= depths[n1]:
+ * if h in path and depths[entries.get(n, n)] < depths[n]:
* entries[n] = h1
*
* def dfs(n, depth):
@@ -1810,7 +1871,7 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
* # Case A: explore succ and update cur's loop entry
* # only if succ's entry is in current DFS path.
* dfs(succ, depth + 1)
- * h = get_loop_entry(succ)
+ * h = entries.get(succ, None)
* update_loop_entry(n, h)
* else:
* # Case B or C depending on `h1 in path` check in update_loop_entry().
@@ -1822,46 +1883,49 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
* and cur's loop entry has to be updated (case A), handle this in
* update_branch_counts();
* - use st->branch > 0 as a signal that st is in the current DFS path;
- * - handle cases B and C in is_state_visited();
- * - update topmost loop entry for intermediate states in get_loop_entry().
+ * - handle cases B and C in is_state_visited().
*/
-static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st)
+static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
{
- struct bpf_verifier_state *topmost = st->loop_entry, *old;
+ struct bpf_verifier_state *topmost = st->loop_entry;
+ u32 steps = 0;
- while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
+ while (topmost && topmost->loop_entry) {
+ if (steps++ > st->dfs_depth) {
+ WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n");
+ verbose(env, "verifier bug: infinite loop in get_loop_entry()\n");
+ return ERR_PTR(-EFAULT);
+ }
topmost = topmost->loop_entry;
- /* Update loop entries for intermediate states to avoid this
- * traversal in future get_loop_entry() calls.
- */
- while (st && st->loop_entry != topmost) {
- old = st->loop_entry;
- st->loop_entry = topmost;
- st = old;
}
return topmost;
}
-static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
+static void update_loop_entry(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
{
- struct bpf_verifier_state *cur1, *hdr1;
-
- cur1 = get_loop_entry(cur) ?: cur;
- hdr1 = get_loop_entry(hdr) ?: hdr;
- /* The head1->branches check decides between cases B and C in
- * comment for get_loop_entry(). If hdr1->branches == 0 then
+ /* The hdr->branches check decides between cases B and C in
+ * comment for get_loop_entry(). If hdr->branches == 0 then
* head's topmost loop entry is not in current DFS path,
* hence 'cur' and 'hdr' are not in the same loop and there is
* no need to update cur->loop_entry.
*/
- if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
+ if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) {
+ if (cur->loop_entry) {
+ cur->loop_entry->used_as_loop_entry--;
+ maybe_free_verifier_state(env, state_loop_entry_as_list(cur));
+ }
cur->loop_entry = hdr;
- hdr->used_as_loop_entry = true;
+ hdr->used_as_loop_entry++;
}
}
static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
+ struct bpf_verifier_state_list *sl = NULL, *parent_sl;
+ struct bpf_verifier_state *parent;
+
while (st) {
u32 br = --st->branches;
@@ -1871,7 +1935,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
* This is a part of 'case A' in get_loop_entry() comment.
*/
if (br == 0 && st->parent && st->loop_entry)
- update_loop_entry(st->parent, st->loop_entry);
+ update_loop_entry(env, st->parent, st->loop_entry);
/* WARN_ON(br > 1) technically makes sense here,
* but see comment in push_stack(), hence:
@@ -1881,7 +1945,12 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
br);
if (br)
break;
- st = st->parent;
+ parent = st->parent;
+ parent_sl = state_parent_as_list(st);
+ if (sl)
+ maybe_free_verifier_state(env, sl);
+ st = parent;
+ sl = parent_sl;
}
}
@@ -5983,7 +6052,8 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type,
- struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx)
+ struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx,
+ u32 *ref_obj_id)
{
struct bpf_insn_access_aux info = {
.reg_type = *reg_type,
@@ -6005,8 +6075,16 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
*is_retval = info.is_retval;
if (base_type(*reg_type) == PTR_TO_BTF_ID) {
+ if (info.ref_obj_id &&
+ !find_reference_state(env->cur_state, info.ref_obj_id)) {
+ verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
+ off);
+ return -EACCES;
+ }
+
*btf = info.btf;
*btf_id = info.btf_id;
+ *ref_obj_id = info.ref_obj_id;
} else {
env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
}
@@ -7369,7 +7447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
struct bpf_retval_range range;
enum bpf_reg_type reg_type = SCALAR_VALUE;
struct btf *btf = NULL;
- u32 btf_id = 0;
+ u32 btf_id = 0, ref_obj_id = 0;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -7382,7 +7460,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return err;
err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
- &btf_id, &is_retval, is_ldsx);
+ &btf_id, &is_retval, is_ldsx, &ref_obj_id);
if (err)
verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
@@ -7413,6 +7491,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (base_type(reg_type) == PTR_TO_BTF_ID) {
regs[value_regno].btf = btf;
regs[value_regno].btf_id = btf_id;
+ regs[value_regno].ref_obj_id = ref_obj_id;
}
}
regs[value_regno].type = reg_type;
@@ -8431,10 +8510,12 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
{
struct bpf_verifier_state_list *sl;
struct bpf_verifier_state *st;
+ struct list_head *pos, *head;
/* Explored states are pushed in stack order, most recent states come first */
- sl = *explored_state(env, insn_idx);
- for (; sl; sl = sl->next) {
+ head = explored_state(env, insn_idx);
+ list_for_each(pos, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
/* If st->branches != 0 state is a part of current DFS verification path,
* hence cur & st for a loop.
*/
@@ -10752,6 +10833,8 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
{
struct bpf_verifier_state *state = env->cur_state;
+ enum bpf_prog_type type = resolve_prog_type(env->prog);
+ struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
bool refs_lingering = false;
int i;
@@ -10761,6 +10844,12 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi
for (i = 0; i < state->acquired_refs; i++) {
if (state->refs[i].type != REF_TYPE_PTR)
continue;
+ /* Allow struct_ops programs to return a referenced kptr back to
+ * kernel. Type checks are performed later in check_return_code.
+ */
+ if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
+ reg->ref_obj_id == state->refs[i].id)
+ continue;
verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
state->refs[i].id, state->refs[i].insn_idx);
refs_lingering = true;
@@ -11781,6 +11870,8 @@ enum special_kfunc_type {
KF_bpf_iter_num_new,
KF_bpf_iter_num_next,
KF_bpf_iter_num_destroy,
+ KF_bpf_set_dentry_xattr,
+ KF_bpf_remove_dentry_xattr,
};
BTF_SET_START(special_kfunc_set)
@@ -11810,6 +11901,10 @@ BTF_ID(func, bpf_wq_set_callback_impl)
#ifdef CONFIG_CGROUPS
BTF_ID(func, bpf_iter_css_task_new)
#endif
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_set_dentry_xattr)
+BTF_ID(func, bpf_remove_dentry_xattr)
+#endif
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -11859,6 +11954,13 @@ BTF_ID(func, bpf_local_irq_restore)
BTF_ID(func, bpf_iter_num_new)
BTF_ID(func, bpf_iter_num_next)
BTF_ID(func, bpf_iter_num_destroy)
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_set_dentry_xattr)
+BTF_ID(func, bpf_remove_dentry_xattr)
+#else
+BTF_ID_UNUSED
+BTF_ID_UNUSED
+#endif
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -16399,13 +16501,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
const char *exit_ctx = "At program exit";
struct tnum enforce_attach_type_range = tnum_unknown;
const struct bpf_prog *prog = env->prog;
- struct bpf_reg_state *reg;
+ struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_retval_range range = retval_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
struct bpf_func_state *frame = env->cur_state->frame[0];
const bool is_subprog = frame->subprogno;
bool return_32bit = false;
+ const struct btf_type *reg_type, *ret_type = NULL;
/* LSM and struct_ops func-ptr's return type could be "void" */
if (!is_subprog || frame->in_exception_callback_fn) {
@@ -16414,10 +16517,26 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
if (prog->expected_attach_type == BPF_LSM_CGROUP)
/* See below, can be 0 or 0-1 depending on hook. */
break;
- fallthrough;
+ if (!prog->aux->attach_func_proto->type)
+ return 0;
+ break;
case BPF_PROG_TYPE_STRUCT_OPS:
if (!prog->aux->attach_func_proto->type)
return 0;
+
+ if (frame->in_exception_callback_fn)
+ break;
+
+ /* Allow a struct_ops program to return a referenced kptr if it
+ * matches the operator's return type and is in its unmodified
+ * form. A scalar zero (i.e., a null pointer) is also allowed.
+ */
+ reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
+ ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
+ prog->aux->attach_func_proto->type,
+ NULL);
+ if (ret_type && ret_type == reg_type && reg->ref_obj_id)
+ return __check_ptr_off_reg(env, reg, regno, false);
break;
default:
break;
@@ -16439,8 +16558,6 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
return -EACCES;
}
- reg = cur_regs(env) + regno;
-
if (frame->in_async_callback_fn) {
/* enforce return zero from async callbacks like timer */
exit_ctx = "At async callback return";
@@ -16539,6 +16656,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
case BPF_PROG_TYPE_NETFILTER:
range = retval_range(NF_DROP, NF_ACCEPT);
break;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ if (!ret_type)
+ return 0;
+ range = retval_range(0, 0);
+ break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.
@@ -17815,18 +17937,22 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
static void clean_live_states(struct bpf_verifier_env *env, int insn,
struct bpf_verifier_state *cur)
{
+ struct bpf_verifier_state *loop_entry;
struct bpf_verifier_state_list *sl;
+ struct list_head *pos, *head;
- sl = *explored_state(env, insn);
- while (sl) {
+ head = explored_state(env, insn);
+ list_for_each(pos, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
if (sl->state.branches)
- goto next;
+ continue;
+ loop_entry = get_loop_entry(env, &sl->state);
+ if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches)
+ continue;
if (sl->state.insn_idx != insn ||
!same_callsites(&sl->state, cur))
- goto next;
+ continue;
clean_verifier_state(env, &sl->state);
-next:
- sl = sl->next;
}
}
@@ -18517,10 +18643,11 @@ static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
- struct bpf_verifier_state_list *sl, **pprev;
+ struct bpf_verifier_state_list *sl;
struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
int i, j, n, err, states_cnt = 0;
bool force_new_state, add_new_state, force_exact;
+ struct list_head *pos, *tmp, *head;
force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
/* Avoid accumulating infinitely long jmp history */
@@ -18539,15 +18666,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
env->insn_processed - env->prev_insn_processed >= 8)
add_new_state = true;
- pprev = explored_state(env, insn_idx);
- sl = *pprev;
-
clean_live_states(env, insn_idx, cur);
- while (sl) {
+ head = explored_state(env, insn_idx);
+ list_for_each_safe(pos, tmp, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
states_cnt++;
if (sl->state.insn_idx != insn_idx)
- goto next;
+ continue;
if (sl->state.branches) {
struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
@@ -18621,7 +18747,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
- update_loop_entry(cur, &sl->state);
+ update_loop_entry(env, cur, &sl->state);
goto hit;
}
}
@@ -18630,7 +18756,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
if (is_may_goto_insn_at(env, insn_idx)) {
if (sl->state.may_goto_depth != cur->may_goto_depth &&
states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
- update_loop_entry(cur, &sl->state);
+ update_loop_entry(env, cur, &sl->state);
goto hit;
}
}
@@ -18697,11 +18823,13 @@ skip_inf_loop_check:
*
* Additional details are in the comment before get_loop_entry().
*/
- loop_entry = get_loop_entry(&sl->state);
+ loop_entry = get_loop_entry(env, &sl->state);
+ if (IS_ERR(loop_entry))
+ return PTR_ERR(loop_entry);
force_exact = loop_entry && loop_entry->branches > 0;
if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
if (force_exact)
- update_loop_entry(cur, loop_entry);
+ update_loop_entry(env, cur, loop_entry);
hit:
sl->hit_cnt++;
/* reached equivalent register/stack state,
@@ -18750,31 +18878,13 @@ miss:
/* the state is unlikely to be useful. Remove it to
* speed up verification
*/
- *pprev = sl->next;
- if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE &&
- !sl->state.used_as_loop_entry) {
- u32 br = sl->state.branches;
-
- WARN_ONCE(br,
- "BUG live_done but branches_to_explore %d\n",
- br);
- free_verifier_state(&sl->state, false);
- kfree(sl);
- env->peak_states--;
- } else {
- /* cannot free this state, since parentage chain may
- * walk it later. Add it for free_list instead to
- * be freed at the end of verification
- */
- sl->next = env->free_list;
- env->free_list = sl;
- }
- sl = *pprev;
- continue;
+ sl->in_free_list = true;
+ list_del(&sl->node);
+ list_add(&sl->node, &env->free_list);
+ env->free_list_size++;
+ env->explored_states_size--;
+ maybe_free_verifier_state(env, sl);
}
-next:
- pprev = &sl->next;
- sl = *pprev;
}
if (env->max_states_per_insn < states_cnt)
@@ -18799,7 +18909,8 @@ next:
if (!new_sl)
return -ENOMEM;
env->total_states++;
- env->peak_states++;
+ env->explored_states_size++;
+ update_peak_states(env);
env->prev_jmps_processed = env->jmps_processed;
env->prev_insn_processed = env->insn_processed;
@@ -18823,8 +18934,8 @@ next:
cur->first_insn_idx = insn_idx;
cur->insn_hist_start = cur->insn_hist_end;
cur->dfs_depth = new->dfs_depth + 1;
- new_sl->next = *explored_state(env, insn_idx);
- *explored_state(env, insn_idx) = new_sl;
+ list_add(&new_sl->node, head);
+
/* connect new state to parentage chain. Current frame needs all
* registers connected. Only r6 - r9 of the callers are alive (pushed
* to the stack implicitly by JITs) so in callers' frames connect just
@@ -19245,6 +19356,8 @@ process_bpf_exit:
return err;
break;
} else {
+ if (WARN_ON_ONCE(env->cur_state->loop_entry))
+ env->cur_state->loop_entry = NULL;
do_print_state = true;
continue;
}
@@ -20939,6 +21052,14 @@ static void specialize_kfunc(struct bpf_verifier_env *env,
*/
env->seen_direct_write = seen_direct_write;
}
+
+ if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] &&
+ bpf_lsm_has_d_inode_locked(prog))
+ *addr = (unsigned long)bpf_set_dentry_xattr_locked;
+
+ if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] &&
+ bpf_lsm_has_d_inode_locked(prog))
+ *addr = (unsigned long)bpf_remove_dentry_xattr_locked;
}
static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
@@ -21897,6 +22018,13 @@ next_insn:
if (subprogs[cur_subprog + 1].start == i + delta + 1) {
subprogs[cur_subprog].stack_depth += stack_depth_extra;
subprogs[cur_subprog].stack_extra = stack_depth_extra;
+
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
+ verbose(env, "stack size %d(extra %d) is too large\n",
+ stack_depth, stack_depth_extra);
+ return -EINVAL;
+ }
cur_subprog++;
stack_depth = subprogs[cur_subprog].stack_depth;
stack_depth_extra = 0;
@@ -22131,31 +22259,29 @@ static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
static void free_states(struct bpf_verifier_env *env)
{
- struct bpf_verifier_state_list *sl, *sln;
+ struct bpf_verifier_state_list *sl;
+ struct list_head *head, *pos, *tmp;
int i;
- sl = env->free_list;
- while (sl) {
- sln = sl->next;
+ list_for_each_safe(pos, tmp, &env->free_list) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
free_verifier_state(&sl->state, false);
kfree(sl);
- sl = sln;
}
- env->free_list = NULL;
+ INIT_LIST_HEAD(&env->free_list);
if (!env->explored_states)
return;
for (i = 0; i < state_htab_size(env); i++) {
- sl = env->explored_states[i];
+ head = &env->explored_states[i];
- while (sl) {
- sln = sl->next;
+ list_for_each_safe(pos, tmp, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
free_verifier_state(&sl->state, false);
kfree(sl);
- sl = sln;
}
- env->explored_states[i] = NULL;
+ INIT_LIST_HEAD(&env->explored_states[i]);
}
}
@@ -22163,6 +22289,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
struct bpf_subprog_info *sub = subprog_info(env, subprog);
+ struct bpf_prog_aux *aux = env->prog->aux;
struct bpf_verifier_state *state;
struct bpf_reg_state *regs;
int ret, i;
@@ -22270,6 +22397,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
mark_reg_known_zero(env, regs, BPF_REG_1);
}
+ /* Acquire references for struct_ops program arguments tagged with "__ref" */
+ if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ for (i = 0; i < aux->ctx_arg_info_size; i++)
+ aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
+ acquire_reference(env, 0) : 0;
+ }
+
ret = do_check(env);
out:
/* check for NULL is necessary, since cur_state can be freed inside
@@ -22392,6 +22526,15 @@ static void print_verification_stats(struct bpf_verifier_env *env)
env->peak_states, env->longest_mark_read_walk);
}
+int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
+ const struct bpf_ctx_arg_aux *info, u32 cnt)
+{
+ prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL);
+ prog->aux->ctx_arg_info_size = cnt;
+
+ return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
+}
+
static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
{
const struct btf_type *t, *func_proto;
@@ -22472,17 +22615,12 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return -EACCES;
}
- /* btf_ctx_access() used this to provide argument type info */
- prog->aux->ctx_arg_info =
- st_ops_desc->arg_info[member_idx].info;
- prog->aux->ctx_arg_info_size =
- st_ops_desc->arg_info[member_idx].cnt;
-
prog->aux->attach_func_proto = func_proto;
prog->aux->attach_func_name = mname;
env->ops = st_ops->verifier_ops;
- return 0;
+ return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
+ st_ops_desc->arg_info[member_idx].cnt);
}
#define SECURITY_PREFIX "security_"
@@ -22932,9 +23070,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
prog->aux->attach_btf_trace = true;
return 0;
} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
- if (!bpf_iter_prog_supported(prog))
- return -EINVAL;
- return 0;
+ return bpf_iter_prog_supported(prog);
}
if (prog->type == BPF_PROG_TYPE_LSM) {
@@ -23113,12 +23249,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
env->explored_states = kvcalloc(state_htab_size(env),
- sizeof(struct bpf_verifier_state_list *),
+ sizeof(struct list_head),
GFP_USER);
ret = -ENOMEM;
if (!env->explored_states)
goto skip_full_check;
+ for (i = 0; i < state_htab_size(env); i++)
+ INIT_LIST_HEAD(&env->explored_states[i]);
+ INIT_LIST_HEAD(&env->free_list);
+
ret = check_btf_info_early(env, attr, uattr);
if (ret < 0)
goto skip_full_check;