From 1934b212615dc617ac84fc306333ab2b9fc3b04f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 9 Aug 2024 18:00:01 +0200 Subject: file: reclaim 24 bytes from f_owner We do embedd struct fown_struct into struct file letting it take up 32 bytes in total. We could tweak struct fown_struct to be more compact but really it shouldn't even be embedded in struct file in the first place. Instead, actual users of struct fown_struct should allocate the struct on demand. This frees up 24 bytes in struct file. That will have some potentially user-visible changes for the ownership fcntl()s. Some of them can now fail due to allocation failures. Practically, that probably will almost never happen as the allocations are small and they only happen once per file. The fown_struct is used during kill_fasync() which is used by e.g., pipes to generate a SIGIO signal. Sending of such signals is conditional on userspace having set an owner for the file using one of the F_OWNER fcntl()s. Such users will be unaffected if struct fown_struct is allocated during the fcntl() call. There are a few subsystems that call __f_setown() expecting file->f_owner to be allocated: (1) tun devices file->f_op->fasync::tun_chr_fasync() -> __f_setown() There are no callers of tun_chr_fasync(). (2) tty devices file->f_op->fasync::tty_fasync() -> __tty_fasync() -> __f_setown() tty_fasync() has no additional callers but __tty_fasync() has. Note that __tty_fasync() only calls __f_setown() if the @on argument is true. It's called from: file->f_op->release::tty_release() -> tty_release() -> __tty_fasync() -> __f_setown() tty_release() calls __tty_fasync() with @on false => __f_setown() is never called from tty_release(). => All callers of tty_release() are safe as well. file->f_op->release::tty_open() -> tty_release() -> __tty_fasync() -> __f_setown() __tty_hangup() calls __tty_fasync() with @on false => __f_setown() is never called from tty_release(). => All callers of __tty_hangup() are safe as well. From the callchains it's obvious that (1) and (2) end up getting called via file->f_op->fasync(). That can happen either through the F_SETFL fcntl() with the FASYNC flag raised or via the FIOASYNC ioctl(). If FASYNC is requested and the file isn't already FASYNC then file->f_op->fasync() is called with @on true which ends up causing both (1) and (2) to call __f_setown(). (1) and (2) are the only subsystems that call __f_setown() from the file->f_op->fasync() handler. So both (1) and (2) have been updated to allocate a struct fown_struct prior to calling fasync_helper() to register with the fasync infrastructure. That's safe as they both call fasync_helper() which also does allocations if @on is true. The other interesting case are file leases: (3) file leases lease_manager_ops->lm_setup::lease_setup() -> __f_setown() Which in turn is called from: generic_add_lease() -> lease_manager_ops->lm_setup::lease_setup() -> __f_setown() So here again we can simply make generic_add_lease() allocate struct fown_struct prior to the lease_manager_ops->lm_setup::lease_setup() which happens under a spinlock. With that the two remaining subsystems that call __f_setown() are: (4) dnotify (5) sockets Both have their own custom ioctls to set struct fown_struct and both have been converted to allocate a struct fown_struct on demand from their respective ioctls. Interactions with O_PATH are fine as well e.g., when opening a /dev/tty as O_PATH then no file->f_op->open() happens thus no file->f_owner is allocated. That's fine as no file operation will be set for those and the device has never been opened. fcntl()s called on such things will just allocate a ->f_owner on demand. Although I have zero idea why'd you care about f_owner on an O_PATH fd. Link: https://lore.kernel.org/r/20240813-work-f_owner-v2-1-4e9343a79f9f@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/file_table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index ca7843dde56d..fdf98709dde2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -155,7 +155,6 @@ static int init_file(struct file *f, int flags, const struct cred *cred) return error; } - rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); mutex_init(&f->f_pos_lock); f->f_flags = flags; @@ -425,7 +424,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); } fops_put(file->f_op); - put_pid(file->f_owner.pid); + file_f_owner_release(file); put_file_access(file); dput(dentry); if (unlikely(mode & FMODE_NEED_UNMOUNT)) -- cgit v1.2.3 From ea566e18b4deea6e998088de4f1a76d1f39c8d3f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 28 Aug 2024 12:56:25 +0200 Subject: fs: use kmem_cache_create_rcu() Switch to the new kmem_cache_create_rcu() helper which allows us to use a custom free pointer offset avoiding the need to have an external free pointer which would grow struct file behind our backs. Link: https://lore.kernel.org/r/20240828-work-kmem_cache-rcu-v3-3-5460bc1f09f6@kernel.org Acked-by: Mike Rapoport (Microsoft) Reviewed-by: Vlastimil Babka Signed-off-by: Christian Brauner --- fs/file_table.c | 6 +++--- include/linux/fs.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index fdf98709dde2..3ef558f27a1c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -511,9 +511,9 @@ EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN | - SLAB_PANIC | SLAB_ACCOUNT, NULL); + filp_cachep = kmem_cache_create_rcu("filp", sizeof(struct file), + offsetof(struct file, f_freeptr), + SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/include/linux/fs.h b/include/linux/fs.h index af8bbd4eeb3a..58c91a52cad1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1011,6 +1011,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_task_work: task work entry point * @f_llist: work queue entrypoint * @f_ra: file's readahead state + * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) */ struct file { atomic_long_t f_count; @@ -1042,6 +1043,7 @@ struct file { struct callback_head f_task_work; struct llist_node f_llist; struct file_ra_state f_ra; + freeptr_t f_freeptr; }; /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout -- cgit v1.2.3 From 5e9b50dea970ae6d3e1309d4254157099734a2af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:59 +0200 Subject: fs: add f_pipe Only regular files with FMODE_ATOMIC_POS and directories need f_pos_lock. Place a new f_pipe member in a union with f_pos_lock that they can use and make them stop abusing f_version in follow-up patches. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-18-6d3e4816aa7b@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/file_table.c | 7 +++++++ include/linux/fs.h | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 3ef558f27a1c..7ce4d5dac080 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -156,6 +156,13 @@ static int init_file(struct file *f, int flags, const struct cred *cred) } spin_lock_init(&f->f_lock); + /* + * Note that f_pos_lock is only used for files raising + * FMODE_ATOMIC_POS and directories. Other files such as pipes + * don't need it and since f_pos_lock is in a union may reuse + * the space for other purposes. They are expected to initialize + * the respective member when opening the file. + */ mutex_init(&f->f_pos_lock); f->f_flags = flags; f->f_mode = OPEN_FMODE(flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e6b3c1afb31..ca4925008244 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1001,6 +1001,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_cred: stashed credentials of creator/opener * @f_path: path of the file * @f_pos_lock: lock protecting file position + * @f_pipe: specific to pipes * @f_pos: file position * @f_version: file version * @f_security: LSM security context of this file @@ -1026,7 +1027,12 @@ struct file { const struct cred *f_cred; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; - struct mutex f_pos_lock; + union { + /* regular files (with FMODE_ATOMIC_POS) and directories */ + struct mutex f_pos_lock; + /* pipes */ + u64 f_pipe; + }; loff_t f_pos; u64 f_version; /* --- cacheline 2 boundary (128 bytes) --- */ -- cgit v1.2.3