diff options
| author | Christian Brauner <brauner@kernel.org> | 2025-06-24 13:00:16 +0200 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2025-06-24 17:10:59 +0200 |
| commit | 867673063e1da91cf960133f25d87e38fc790d7b (patch) | |
| tree | 07648255a0a28aa0880d45a19213ee2dc90e300c | |
| parent | pidfs: fix pidfs_free_pid() (diff) | |
| parent | selftests/pidfd: decode pidfd file handles withou having to specify an fd (diff) | |
| download | linux-867673063e1da91cf960133f25d87e38fc790d7b.tar.gz linux-867673063e1da91cf960133f25d87e38fc790d7b.zip | |
Merge patch series "fhandle, pidfs: allow open_by_handle_at() purely based on file handle"
Christian Brauner <brauner@kernel.org> says:
Various filesystems such as pidfs and drm support opening file handles
without having to require a file descriptor to identify the filesystem.
The filesystem are global single instances and can be trivially
identified solely on the information encoded in the file handle.
This makes it possible to not have to keep or acquire a sentinal file
descriptor just to pass it to open_by_handle_at() to identify the
filesystem. That's especially useful when such sentinel file descriptor
cannot or should not be acquired.
For pidfs this means a file handle can function as full replacement for
storing a pid in a file. Instead a file handle can be stored and
reopened purely based on the file handle.
Such autonomous file handles can be opened with or without specifying a
a file descriptor. If no proper file descriptor is used the
FD_PIDFS_ROOT sentinel must be passed. This allows us to define further
special negative fd sentinels in the future.
Userspace can trivially test for support by trying to open the file
handle with an invalid file descriptor.
* patches from https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-0-d02a04858fe3@kernel.org:
selftests/pidfd: decode pidfd file handles withou having to specify an fd
fhandle, pidfs: support open_by_handle_at() purely based on file handle
uapi/fcntl: add FD_PIDFS_ROOT
uapi/fcntl: add FD_INVALID
uapi/fcntl: mark range as reserved
fhandle: reflow get_path_anchor()
pidfs: add pidfs_root_path() helper
fhandle: rename to get_path_anchor()
fhandle: hoist copy_from_user() above get_path_from_fd()
fhandle: raise FILEID_IS_DIR in handle_type
Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-0-d02a04858fe3@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
| -rw-r--r-- | fs/fhandle.c | 64 | ||||
| -rw-r--r-- | fs/internal.h | 1 | ||||
| -rw-r--r-- | fs/pidfs.c | 11 | ||||
| -rw-r--r-- | include/uapi/linux/fcntl.h | 18 | ||||
| -rw-r--r-- | include/uapi/linux/pidfd.h | 15 | ||||
| -rw-r--r-- | tools/testing/selftests/pidfd/Makefile | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/pidfd/pidfd.h | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/pidfd/pidfd_file_handle_test.c | 60 |
8 files changed, 129 insertions, 48 deletions
diff --git a/fs/fhandle.c b/fs/fhandle.c index 3e092ae6d142..b1363ead6c5e 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -88,7 +88,7 @@ static long do_sys_name_to_handle(const struct path *path, if (fh_flags & EXPORT_FH_CONNECTABLE) { handle->handle_type |= FILEID_IS_CONNECTABLE; if (d_is_dir(path->dentry)) - fh_flags |= FILEID_IS_DIR; + handle->handle_type |= FILEID_IS_DIR; } retval = 0; } @@ -168,23 +168,32 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, return err; } -static int get_path_from_fd(int fd, struct path *root) +static int get_path_anchor(int fd, struct path *root) { + if (fd >= 0) { + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; + *root = fd_file(f)->f_path; + path_get(root); + return 0; + } + if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); *root = fs->pwd; path_get(root); spin_unlock(&fs->lock); - } else { - CLASS(fd, f)(fd); - if (fd_empty(f)) - return -EBADF; - *root = fd_file(f)->f_path; - path_get(root); + return 0; } - return 0; + if (fd == FD_PIDFS_ROOT) { + pidfs_get_root(root); + return 0; + } + + return -EBADF; } static int vfs_dentry_acceptable(void *context, struct dentry *dentry) @@ -323,13 +332,24 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, { int retval = 0; struct file_handle f_handle; - struct file_handle *handle = NULL; + struct file_handle *handle __free(kfree) = NULL; struct handle_to_path_ctx ctx = {}; const struct export_operations *eops; - retval = get_path_from_fd(mountdirfd, &ctx.root); + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) + return -EFAULT; + + if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || + (f_handle.handle_bytes == 0)) + return -EINVAL; + + if (f_handle.handle_type < 0 || + FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) + return -EINVAL; + + retval = get_path_anchor(mountdirfd, &ctx.root); if (retval) - goto out_err; + return retval; eops = ctx.root.mnt->mnt_sb->s_export_op; if (eops && eops->permission) @@ -339,21 +359,6 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, if (retval) goto out_path; - if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { - retval = -EFAULT; - goto out_path; - } - if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || - (f_handle.handle_bytes == 0)) { - retval = -EINVAL; - goto out_path; - } - if (f_handle.handle_type < 0 || - FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) { - retval = -EINVAL; - goto out_path; - } - handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { @@ -366,7 +371,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, &ufh->f_handle, f_handle.handle_bytes)) { retval = -EFAULT; - goto out_handle; + goto out_path; } /* @@ -384,11 +389,8 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, handle->handle_type &= ~FILEID_USER_FLAGS_MASK; retval = do_handle_to_path(handle, path, &ctx); -out_handle: - kfree(handle); out_path: path_put(&ctx.root); -out_err: return retval; } diff --git a/fs/internal.h b/fs/internal.h index 22ba066d1dba..ad256bccdc85 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -353,3 +353,4 @@ int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, unsigned int query_flags); int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); +void pidfs_get_root(struct path *path); diff --git a/fs/pidfs.c b/fs/pidfs.c index 47f5f9e0bdff..4fc7a7f4a3fe 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -31,6 +31,14 @@ static struct kmem_cache *pidfs_attr_cachep __ro_after_init; static struct kmem_cache *pidfs_xattr_cachep __ro_after_init; +static struct path pidfs_root_path = {}; + +void pidfs_get_root(struct path *path) +{ + *path = pidfs_root_path; + path_get(path); +} + /* * Stashes information that userspace needs to access even after the * process has been reaped. @@ -1068,4 +1076,7 @@ void __init pidfs_init(void) pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); + + pidfs_root_path.mnt = pidfs_mnt; + pidfs_root_path.dentry = pidfs_mnt->mnt_root; } diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a15ac2fa4b20..f291ab4f94eb 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -90,10 +90,28 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* Reserved kernel ranges [-100], [-10000, -40000]. */ #define AT_FDCWD -100 /* Special value for dirfd used to indicate openat should use the current working directory. */ +/* + * The concept of process and threads in userland and the kernel is a confusing + * one - within the kernel every thread is a 'task' with its own individual PID, + * however from userland's point of view threads are grouped by a single PID, + * which is that of the 'thread group leader', typically the first thread + * spawned. + * + * To cut the Gideon knot, for internal kernel usage, we refer to + * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel + * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread + * group leader... + */ +#define PIDFD_SELF_THREAD -10000 /* Current thread. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ + +#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ +#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index c27a4e238e4b..957db425d459 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -43,21 +43,6 @@ #define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ /* - * The concept of process and threads in userland and the kernel is a confusing - * one - within the kernel every thread is a 'task' with its own individual PID, - * however from userland's point of view threads are grouped by a single PID, - * which is that of the 'thread group leader', typically the first thread - * spawned. - * - * To cut the Gideon knot, for internal kernel usage, we refer to - * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel - * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread - * group leader... - */ -#define PIDFD_SELF_THREAD -10000 /* Current thread. */ -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ - -/* * ...and for userland we make life simpler - PIDFD_SELF refers to the current * thread, PIDFD_SELF_PROCESS refers to the process thread group leader. * diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 03a6eede9c9e..764a8f9ecefa 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall +CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index efd74063126e..cd244d0860ff 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -19,6 +19,10 @@ #include "../kselftest.h" #include "../clone3/clone3_selftests.h" +#ifndef FD_PIDFS_ROOT +#define FD_PIDFS_ROOT -10002 +#endif + #ifndef P_PIDFD #define P_PIDFD 3 #endif @@ -56,7 +60,7 @@ #endif #ifndef PIDFD_SELF_THREAD_GROUP -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #endif #ifndef PIDFD_SELF diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c index 439b9c6c0457..6bd2e9c9565b 100644 --- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags) ASSERT_EQ(close(pidfd), 0); } +/* + * That we decode a file handle without having to pass a pidfd. + */ +TEST_F(file_handle, decode_purely_based_on_file_handle) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(-EBADF, fh, 0); + ASSERT_LT(pidfd, 0); + + pidfd = open_by_handle_at(AT_FDCWD, fh, 0); + ASSERT_LT(pidfd, 0); + + free(fh); +} + TEST_HARNESS_MAIN |
