Merge branch 'bpf-implement-mprog-api-on-top-of-existing-cgroup-progs'

Yonghong Song says: ==================== bpf: Implement mprog API on top of existing cgroup progs Current cgroup prog ordering is appending at attachment time. This is not ideal. In some cases, users want specific ordering at a particular cgroup level. For example, in Meta, we have a case where three different applications all have cgroup/setsockopt progs and they require specific ordering. Current approach is to use a bpfchainer where one bpf prog contains multiple global functions and each global function can be freplaced by a prog for a specific application. The ordering of global functions decides the ordering of those application specific bpf progs. Using bpfchainer is a centralized approach and is not desirable as one of applications acts as a daemon. The decentralized attachment approach is more favorable for those applications. To address this, the existing mprog API ([2]) seems an ideal solution with supporting BPF_F_BEFORE and BPF_F_AFTER flags on top of existing cgroup bpf implementation. More specifically, the support is added for prog/link attachment with BPF_F_BEFORE and BPF_F_AFTER. The kernel mprog interface ([2]) is not used and the implementation is directly done in cgroup bpf code base. The mprog 'revision' is also implemented in attach/detach/replace, so users can query revision number to check the change of cgroup prog list. The patch set contains 5 patches. Patch 1 adds revision support for cgroup bpf progs. Patch 2 implements mprog API implementation for prog/link attach and revision update. Patch 3 adds a new libbpf API to do cgroup link attach with flags like BPF_F_BEFORE/BPF_F_AFTER. Patches 4 and 5 add two tests to validate the implementation. [1] https://lore.kernel.org/r/20250224230116.283071-1-yonghong.song@linux.dev [2] https://lore.kernel.org/r/20230719140858.13224-2-daniel@iogearbox.net Changelogs: v4 -> v5: - v4: https://lore.kernel.org/bpf/20250530173812.1823479-1-yonghong.song@linux.dev/ - Remove early prog/link checking based flags and id_or_fd as later code will do checking as well. - Do proper cgroup flag checking for bpf_prog_attach(). v3 -> v4: - v3: https://lore.kernel.org/bpf/20250517162720.4077882-1-yonghong.song@linux.dev/ - Refactor some to make BPF_F_BEFORE/BPF_F_AFTER handling easier to understand. - Perviously, I degraded 'link' to 'prog' for later mprog handling. This is not correct. Similar to mprog.c, we should be check 'link' instead link->prog since it is possible two different links may have the same underlying prog and we do not want to miss supporting such use case. v2 -> v3: - v2: https://lore.kernel.org/bpf/20250508223524.487875-1-yonghong.song@linux.dev/ - Big change to replace get_anchor_prog() to get_prog_list() so the 'struct bpf_prog_list *' is returned directly. - Support 'BPF_F_BEFORE | BPF_F_AFTER' attachment if the prog list is empty and flags do not have 'BPF_F_LINK | BPF_F_ID' and id_or_fd is 0. - Add BPF_F_LINK support. - Patch 4 is added to reuse id_from_prog_fd() and id_from_link_fd(). v1 -> v2: - v1: https://lore.kernel.org/bpf/20250411011523.1838771-1-yonghong.song@linux.dev/ - Change cgroup_bpf.revisions from atomic64_t to u64. - Added missing bpf_prog_put in various places. - Rename get_cmp_prog() to get_anchor_prog(). The implementation tries to find the anchor prog regardless of whether id_or_fd is non-NULL or not. - Rename bpf_cgroup_prog_attached() to is_cgroup_prog_type() and handle BPF_PROG_TYPE_LSM properly (with BPF_LSM_CGROUP attach type). - I kept 'id || id_or_fd' condition as the condition 'id' is also used in mprog.c so I assume it is okay in cgroup.c as well. ==================== Link: https://patch.msgid.link/20250606163131.2428225-1-yonghong.song@linux.dev Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
author: Andrii Nakryiko <andrii@kernel.org> 2025-06-09 16:17:12 -0700
committer: Andrii Nakryiko <andrii@kernel.org> 2025-06-09 16:28:31 -0700
commit: 4d2815a1cc3dffde425acc448247d74ccdc94bc9 (patch)
tree: ee4f3bbbef0f5e95caa73394b6e6217b261234df /kernel/bpf/syscall.c
parent: Documentation: Fix spelling mistake. (diff)
parent: selftests/bpf: Add two selftests for mprog API based cgroup progs (diff)
download: linux-4d2815a1cc3dffde425acc448247d74ccdc94bc9.tar.gz
linux-4d2815a1cc3dffde425acc448247d74ccdc94bc9.zip
1 files changed, 31 insertions, 15 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 89d027cd7ca0..97ad57ffc404 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4186,6 +4186,25 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 	}
 }
 
+static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype,
+				bool check_atype)
+{
+	switch (ptype) {
+	case BPF_PROG_TYPE_CGROUP_DEVICE:
+	case BPF_PROG_TYPE_CGROUP_SKB:
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_SOCK_OPS:
+		return true;
+	case BPF_PROG_TYPE_LSM:
+		return check_atype ? atype == BPF_LSM_CGROUP : true;
+	default:
+		return false;
+	}
+}
+
 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision
 
 #define BPF_F_ATTACH_MASK_BASE	\
@@ -4216,6 +4235,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	if (bpf_mprog_supported(ptype)) {
 		if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
 			return -EINVAL;
+	} else if (is_cgroup_prog_type(ptype, 0, false)) {
+		if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG))
+			return -EINVAL;
 	} else {
 		if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE)
 			return -EINVAL;
@@ -4233,6 +4255,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
+	if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) {
+		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
+		goto out;
+	}
+
 	switch (ptype) {
 	case BPF_PROG_TYPE_SK_SKB:
 	case BPF_PROG_TYPE_SK_MSG:
@@ -4244,20 +4271,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		ret = netns_bpf_prog_attach(attr, prog);
 		break;
-	case BPF_PROG_TYPE_CGROUP_DEVICE:
-	case BPF_PROG_TYPE_CGROUP_SKB:
-	case BPF_PROG_TYPE_CGROUP_SOCK:
-	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
-	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
-	case BPF_PROG_TYPE_CGROUP_SYSCTL:
-	case BPF_PROG_TYPE_SOCK_OPS:
-	case BPF_PROG_TYPE_LSM:
-		if (ptype == BPF_PROG_TYPE_LSM &&
-		    prog->expected_attach_type != BPF_LSM_CGROUP)
-			ret = -EINVAL;
-		else
-			ret = cgroup_bpf_prog_attach(attr, ptype, prog);
-		break;
 	case BPF_PROG_TYPE_SCHED_CLS:
 		if (attr->attach_type == BPF_TCX_INGRESS ||
 		    attr->attach_type == BPF_TCX_EGRESS)
@@ -4268,7 +4281,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	default:
 		ret = -EINVAL;
 	}
-
+out:
 	if (ret)
 		bpf_prog_put(prog);
 	return ret;
@@ -4296,6 +4309,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 			if (IS_ERR(prog))
 				return PTR_ERR(prog);
 		}
+	} else if (is_cgroup_prog_type(ptype, 0, false)) {
+		if (attr->attach_flags || attr->relative_fd)
+			return -EINVAL;
 	} else if (attr->attach_flags ||
 		   attr->relative_fd ||
 		   attr->expected_revision) {
author	Andrii Nakryiko <andrii@kernel.org>	2025-06-09 16:17:12 -0700
committer	Andrii Nakryiko <andrii@kernel.org>	2025-06-09 16:28:31 -0700
commit	4d2815a1cc3dffde425acc448247d74ccdc94bc9 (patch)
tree	ee4f3bbbef0f5e95caa73394b6e6217b261234df /kernel/bpf/syscall.c
parent	Documentation: Fix spelling mistake. (diff)
parent	selftests/bpf: Add two selftests for mprog API based cgroup progs (diff)
download	linux-4d2815a1cc3dffde425acc448247d74ccdc94bc9.tar.gz linux-4d2815a1cc3dffde425acc448247d74ccdc94bc9.zip