From 114039b342014680911c35bd6b72624180fd669a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 21 Nov 2022 10:03:39 -0800 Subject: bpf: Move skb->len == 0 checks into __bpf_redirect To avoid potentially breaking existing users. Both mac/no-mac cases have to be amended; mac_header >= network_header is not enough (verified with a new test, see next patch). Fixes: fd1894224407 ("bpf: Don't redirect packets with invalid pkt_len") Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20221121180340.1983627-1-sdf@google.com Signed-off-by: Martin KaFai Lau --- net/bpf/test_run.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/bpf/test_run.c') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 13d578ce2a09..6fba440efc40 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -979,9 +979,6 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; - if (!skb->len) - return -EINVAL; - if (!__skb) return 0; -- cgit v1.2.3 From 5b481acab4ce017fda8166fa9428511da41109e5 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 6 Dec 2022 15:59:32 +0100 Subject: bpf: do not rely on ALLOW_ERROR_INJECTION for fmod_ret The current way of expressing that a non-bpf kernel component is willing to accept that bpf programs can be attached to it and that they can change the return value is to abuse ALLOW_ERROR_INJECTION. This is debated in the link below, and the result is that it is not a reasonable thing to do. Reuse the kfunc declaration structure to also tag the kernel functions we want to be fmodret. This way we can control from any subsystem which functions are being modified by bpf without touching the verifier. Link: https://lore.kernel.org/all/20221121104403.1545f9b5@gandalf.local.home/ Suggested-by: Alexei Starovoitov Signed-off-by: Benjamin Tissoires Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20221206145936.922196-2-benjamin.tissoires@redhat.com --- include/linux/btf.h | 2 ++ kernel/bpf/btf.c | 30 +++++++++++++++++++++++++----- kernel/bpf/verifier.c | 17 +++++++++++++++-- net/bpf/test_run.c | 14 +++++++++++--- 4 files changed, 53 insertions(+), 10 deletions(-) (limited to 'net/bpf/test_run.c') diff --git a/include/linux/btf.h b/include/linux/btf.h index f9aababc5d78..6a0808e7845c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -412,8 +412,10 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, u32 kfunc_btf_id); +u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); +int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset); s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 35c07afac924..c1df506293e4 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -204,6 +204,7 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_STRUCT_OPS, BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, + BTF_KFUNC_HOOK_FMODRET, BTF_KFUNC_HOOK_MAX, }; @@ -7448,11 +7449,14 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf, return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); } -/* This function must be invoked only from initcalls/module init functions */ -int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, - const struct btf_kfunc_id_set *kset) +u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id) +{ + return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id); +} + +static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, + const struct btf_kfunc_id_set *kset) { - enum btf_kfunc_hook hook; struct btf *btf; int ret; @@ -7471,13 +7475,29 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, if (IS_ERR(btf)) return PTR_ERR(btf); - hook = bpf_prog_type_to_kfunc_hook(prog_type); ret = btf_populate_kfunc_set(btf, hook, kset->set); btf_put(btf); return ret; } + +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *kset) +{ + enum btf_kfunc_hook hook; + + hook = bpf_prog_type_to_kfunc_hook(prog_type); + return __register_btf_kfunc_id_set(hook, kset); +} EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset) +{ + return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset); +} +EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set); + s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) { struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 225666307bba..ec2e6ec7309e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15021,12 +15021,22 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, ret = -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_TRACING: - /* fentry/fexit/fmod_ret progs can be sleepable only if they are + + /* fentry/fexit/fmod_ret progs can be sleepable if they are * attached to ALLOW_ERROR_INJECTION and are not in denylist. */ if (!check_non_sleepable_error_inject(btf_id) && within_error_injection_list(addr)) ret = 0; + /* fentry/fexit/fmod_ret progs can also be sleepable if they are + * in the fmodret id set with the KF_SLEEPABLE flag. + */ + else { + u32 *flags = btf_kfunc_is_modify_return(btf, btf_id); + + if (flags && (*flags & KF_SLEEPABLE)) + ret = 0; + } break; case BPF_PROG_TYPE_LSM: /* LSM progs check that they are attached to bpf_lsm_*() funcs. @@ -15047,7 +15057,10 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, bpf_log(log, "can't modify return codes of BPF programs\n"); return -EINVAL; } - ret = check_attach_modify_return(addr, tname); + ret = -EINVAL; + if (btf_kfunc_is_modify_return(btf, btf_id) || + !check_attach_modify_return(addr, tname)) + ret = 0; if (ret) { bpf_log(log, "%s() is not modifiable\n", tname); return ret; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 13d578ce2a09..5079fb089b5f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -489,7 +489,6 @@ int noinline bpf_fentry_test1(int a) return a + 1; } EXPORT_SYMBOL_GPL(bpf_fentry_test1); -ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO); int noinline bpf_fentry_test2(int a, u64 b) { @@ -733,7 +732,15 @@ noinline void bpf_kfunc_call_test_destructive(void) __diag_pop(); -ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); +BTF_SET8_START(bpf_test_modify_return_ids) +BTF_ID_FLAGS(func, bpf_modify_return_test) +BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) +BTF_SET8_END(bpf_test_modify_return_ids) + +static const struct btf_kfunc_id_set bpf_test_modify_return_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modify_return_ids, +}; BTF_SET8_START(test_sk_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) @@ -1668,7 +1675,8 @@ static int __init bpf_prog_test_run_init(void) }; int ret; - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); + ret = register_btf_fmodret_id_set(&bpf_test_modify_return_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set); return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc, -- cgit v1.2.3 From ce098da1497c6dee9589fce2c61d1910f4fcf0e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Dec 2022 22:02:59 -0800 Subject: skbuff: Introduce slab_build_skb() syzkaller reported: BUG: KASAN: slab-out-of-bounds in __build_skb_around+0x235/0x340 net/core/skbuff.c:294 Write of size 32 at addr ffff88802aa172c0 by task syz-executor413/5295 For bpf_prog_test_run_skb(), which uses a kmalloc()ed buffer passed to build_skb(). When build_skb() is passed a frag_size of 0, it means the buffer came from kmalloc. In these cases, ksize() is used to find its actual size, but since the allocation may not have been made to that size, actually perform the krealloc() call so that all the associated buffer size checking will be correctly notified (and use the "new" pointer so that compiler hinting works correctly). Split this logic out into a new interface, slab_build_skb(), but leave the original 0 checking for now to catch any stragglers. Reported-by: syzbot+fda18eaa8c12534ccb3b@syzkaller.appspotmail.com Link: https://groups.google.com/g/syzkaller-bugs/c/UnIKxTtU5-0/m/-wbXinkgAQAJ Fixes: 38931d8989b5 ("mm: Make ksize() a reporting-only function") Cc: Pavel Begunkov Cc: pepsipu Cc: syzbot+fda18eaa8c12534ccb3b@syzkaller.appspotmail.com Cc: Vlastimil Babka Cc: kasan-dev Cc: Andrii Nakryiko Cc: ast@kernel.org Cc: Daniel Borkmann Cc: Hao Luo Cc: Jesper Dangaard Brouer Cc: John Fastabend Cc: jolsa@kernel.org Cc: KP Singh Cc: martin.lau@linux.dev Cc: Stanislav Fomichev Cc: song@kernel.org Cc: Yonghong Song Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221208060256.give.994-kees@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 +- include/linux/skbuff.h | 1 + net/bpf/test_run.c | 2 +- net/core/skbuff.c | 70 +++++++++++++++++++++++++++---- 5 files changed, 66 insertions(+), 11 deletions(-) (limited to 'net/bpf/test_run.c') diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index dbe310144780..9f473854b0f4 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -3045,7 +3045,7 @@ error: dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size, DMA_FROM_DEVICE); - skb = build_skb(data, 0); + skb = slab_build_skb(data); if (!skb) { kfree(data); goto error; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index ed274f033626..e5116a86cfbc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -200,7 +200,7 @@ static void qed_ll2b_complete_rx_packet(void *cxt, dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, cdev->ll2->rx_size, DMA_FROM_DEVICE); - skb = build_skb(buffer->data, 0); + skb = slab_build_skb(buffer->data); if (!skb) { DP_INFO(cdev, "Failed to build SKB\n"); kfree(buffer->data); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4e464a27adaf..4c8492401a10 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1255,6 +1255,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, void skb_attempt_defer_free(struct sk_buff *skb); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); +struct sk_buff *slab_build_skb(void *data); /** * alloc_skb - allocate a network buffer diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6094ef7cffcd..c9bfd263dcef 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1128,7 +1128,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, } sock_init_data(NULL, sk); - skb = build_skb(data, 0); + skb = slab_build_skb(data); if (!skb) { kfree(data); kfree(ctx); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4bf95e36ed16..3cbba7099c0f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -270,12 +270,10 @@ static struct sk_buff *napi_skb_cache_get(void) return skb; } -/* Caller must provide SKB that is memset cleared */ -static void __build_skb_around(struct sk_buff *skb, void *data, - unsigned int frag_size) +static inline void __finalize_skb_around(struct sk_buff *skb, void *data, + unsigned int size) { struct skb_shared_info *shinfo; - unsigned int size = frag_size ? : ksize(data); size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -297,15 +295,71 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb_set_kcov_handle(skb, kcov_common_handle()); } +static inline void *__slab_build_skb(struct sk_buff *skb, void *data, + unsigned int *size) +{ + void *resized; + + /* Must find the allocation size (and grow it to match). */ + *size = ksize(data); + /* krealloc() will immediately return "data" when + * "ksize(data)" is requested: it is the existing upper + * bounds. As a result, GFP_ATOMIC will be ignored. Note + * that this "new" pointer needs to be passed back to the + * caller for use so the __alloc_size hinting will be + * tracked correctly. + */ + resized = krealloc(data, *size, GFP_ATOMIC); + WARN_ON_ONCE(resized != data); + return resized; +} + +/* build_skb() variant which can operate on slab buffers. + * Note that this should be used sparingly as slab buffers + * cannot be combined efficiently by GRO! + */ +struct sk_buff *slab_build_skb(void *data) +{ + struct sk_buff *skb; + unsigned int size; + + skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + data = __slab_build_skb(skb, data, &size); + __finalize_skb_around(skb, data, size); + + return skb; +} +EXPORT_SYMBOL(slab_build_skb); + +/* Caller must provide SKB that is memset cleared */ +static void __build_skb_around(struct sk_buff *skb, void *data, + unsigned int frag_size) +{ + unsigned int size = frag_size; + + /* frag_size == 0 is considered deprecated now. Callers + * using slab buffer should use slab_build_skb() instead. + */ + if (WARN_ONCE(size == 0, "Use slab_build_skb() instead")) + data = __slab_build_skb(skb, data, &size); + + __finalize_skb_around(skb, data, size); +} + /** * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of data, or 0 if head was kmalloced + * @frag_size: size of data (must not be 0) * * Allocate a new &sk_buff. Caller provides space holding head and - * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator - * or vmalloc() + * skb_shared_info. @data must have been allocated from the page + * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc() + * allocation is deprecated, and callers should use slab_build_skb() + * instead.) * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : -- cgit v1.2.3