From 8b3d838139bcd1e552f1899191f734264ce2a1a5 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 16 Jan 2024 15:53:35 +0800 Subject: fs: improve dump_mapping() robustness We met a kernel crash issue when running stress-ng testing, and the system crashes when printing the dentry name in dump_mapping(). Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 pc : dentry_name+0xd8/0x224 lr : pointer+0x22c/0x370 sp : ffff800025f134c0 ...... Call trace: dentry_name+0xd8/0x224 pointer+0x22c/0x370 vsnprintf+0x1ec/0x730 vscnprintf+0x2c/0x60 vprintk_store+0x70/0x234 vprintk_emit+0xe0/0x24c vprintk_default+0x3c/0x44 vprintk_func+0x84/0x2d0 printk+0x64/0x88 __dump_page+0x52c/0x530 dump_page+0x14/0x20 set_migratetype_isolate+0x110/0x224 start_isolate_page_range+0xc4/0x20c offline_pages+0x124/0x474 memory_block_offline+0x44/0xf4 memory_subsys_offline+0x3c/0x70 device_offline+0xf0/0x120 ...... The root cause is that, one thread is doing page migration, and we will use the target page's ->mapping field to save 'anon_vma' pointer between page unmap and page move, and now the target page is locked and refcount is 1. Currently, there is another stress-ng thread performing memory hotplug, attempting to offline the target page that is being migrated. It discovers that the refcount of this target page is 1, preventing the offline operation, thus proceeding to dump the page. However, page_mapping() of the target page may return an incorrect file mapping to crash the system in dump_mapping(), since the target page->mapping only saves 'anon_vma' pointer without setting PAGE_MAPPING_ANON flag. The page migration issue has been fixed by commit d1adb25df711 ("mm: migrate: fix getting incorrect page mapping during page migration"). In addition, Matthew suggested we should also improve dump_mapping()'s robustness to resilient against the kernel crash [1]. With checking the 'dentry.parent' and 'dentry.d_name.name' used by dentry_name(), I can see dump_mapping() will output the invalid dentry instead of crashing the system when this issue is reproduced again. [12211.189128] page:fffff7de047741c0 refcount:1 mapcount:0 mapping:ffff989117f55ea0 index:0x1 pfn:0x211dd07 [12211.189144] aops:0x0 ino:1 invalid dentry:74786574206e6870 [12211.189148] flags: 0x57ffffc0000001(locked|node=1|zone=2|lastcpupid=0x1fffff) [12211.189150] page_type: 0xffffffff() [12211.189153] raw: 0057ffffc0000001 0000000000000000 dead000000000122 ffff989117f55ea0 [12211.189154] raw: 0000000000000001 0000000000000001 00000001ffffffff 0000000000000000 [12211.189155] page dumped because: unmovable page [1] https://lore.kernel.org/all/ZXxn%2F0oixJxxAnpF@casper.infradead.org/ Suggested-by: Matthew Wilcox Signed-off-by: Baolin Wang Link: https://lore.kernel.org/r/937ab1f87328516821d39be672b6bc18861d9d3e.1705391420.git.baolin.wang@linux.alibaba.com Signed-off-by: Christian Brauner --- fs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 91048c4c9c9e..6d0d54230363 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -588,7 +588,8 @@ void dump_mapping(const struct address_space *mapping) } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); - if (get_kernel_nofault(dentry, dentry_ptr)) { + if (get_kernel_nofault(dentry, dentry_ptr) || + !dentry.d_parent || !dentry.d_name.name) { pr_warn("aops:%ps ino:%lx invalid dentry:%px\n", a_ops, ino, dentry_ptr); return; -- cgit v1.2.3 From fe3944fb245ab99570552a3bf970b00058a9ca6d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 2 Feb 2024 12:39:23 -0800 Subject: fs: Move enum rw_hint into a new header file Move enum rw_hint into a new header file to prepare for using this data type in the block layer. Add the attribute __packed to reduce the space occupied by instances of this data type from four bytes to one byte. Change the data type of i_write_hint from u8 into enum rw_hint. Reviewed-by: Christoph Hellwig Acked-by: Chao Yu # for the F2FS part Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240202203926.2478590-5-bvanassche@acm.org Signed-off-by: Christian Brauner --- fs/f2fs/f2fs.h | 1 + fs/fcntl.c | 1 + fs/inode.c | 1 + include/linux/fs.h | 16 ++-------------- include/linux/rw_hint.h | 24 ++++++++++++++++++++++++ 5 files changed, 29 insertions(+), 14 deletions(-) create mode 100644 include/linux/rw_hint.h (limited to 'fs/inode.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 65294e3b0bef..01fde6d44bf6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/fs/fcntl.c b/fs/fcntl.c index d2b15351ab8e..00be0a710bba 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/fs/inode.c b/fs/inode.c index 91048c4c9c9e..1aba6c0bf26a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..bdabda5dc364 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -309,19 +310,6 @@ struct address_space; struct writeback_control; struct readahead_control; -/* - * Write life time hint values. - * Stored in struct inode as u8. - */ -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, - WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, - WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, - WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, - WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, -}; - /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC @@ -677,7 +665,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; - u8 i_write_hint; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h new file mode 100644 index 000000000000..309ca72f2dfb --- /dev/null +++ b/include/linux/rw_hint.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RW_HINT_H +#define _LINUX_RW_HINT_H + +#include +#include +#include + +/* Block storage write lifetime hint values. */ +enum rw_hint { + WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +} __packed; + +/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ +#ifndef __CHECKER__ +static_assert(sizeof(enum rw_hint) == 1); +#endif + +#endif /* _LINUX_RW_HINT_H */ -- cgit v1.2.3 From c997d683d952d8c927922ea0ab4d33a01c8efc2c Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sat, 24 Feb 2024 13:53:15 +0000 Subject: vfs: remove SLAB_MEM_SPREAD flag usage The SLAB_MEM_SPREAD flag used to be implemented in SLAB, which was removed as of v6.8-rc1 (see [1]), so it became a dead flag since the commit 16a1d968358a ("mm/slab: remove mm/slab.c and slab_def.h"). And the series[1] went on to mark it obsolete explicitly to avoid confusion for users. Here we can just remove all its users, which has no any functional change. Signed-off-by: Chengming Zhou Link: https://lore.kernel.org/all/20240223-slab-cleanup-flags-v2-1-02f1753e8303@suse.cz [1] Link: https://lore.kernel.org/r/20240224135315.830477-1-chengming.zhou@linux.dev Signed-off-by: Christian Brauner --- fs/buffer.c | 2 +- fs/dcache.c | 2 +- fs/inode.c | 2 +- fs/mbcache.c | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/buffer.c b/fs/buffer.c index b55dea034a5d..9a54077de87d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3122,7 +3122,7 @@ void __init buffer_init(void) int ret; bh_cachep = KMEM_CACHE(buffer_head, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC); /* * Limit the bh occupancy to 10% of ZONE_NORMAL */ diff --git a/fs/dcache.c b/fs/dcache.c index b813528fb147..be9e10155c8d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3136,7 +3136,7 @@ static void __init dcache_init(void) * of the dcache. */ dentry_cache = KMEM_CACHE_USERCOPY(dentry, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT, d_iname); /* Hash may have been set up in dcache_init_early */ diff --git a/fs/inode.c b/fs/inode.c index 6d0d54230363..d2e8e3884b36 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2286,7 +2286,7 @@ void __init inode_init(void) sizeof(struct inode), 0, (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), + SLAB_ACCOUNT), init_once); /* Hash may have been set up in inode_init_early */ diff --git a/fs/mbcache.c b/fs/mbcache.c index fe2624e17253..e60a840999aa 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -426,8 +426,7 @@ EXPORT_SYMBOL(mb_cache_destroy); static int __init mbcache_init(void) { - mb_entry_cache = KMEM_CACHE(mb_cache_entry, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD); + mb_entry_cache = KMEM_CACHE(mb_cache_entry, SLAB_RECLAIM_ACCOUNT); if (!mb_entry_cache) return -ENOMEM; return 0; -- cgit v1.2.3 From 24a8b7bfb96120b32c222ef108bb7a713d1e385a Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Thu, 29 Feb 2024 01:30:31 +0800 Subject: fs: use inode_set_ctime_to_ts to set inode ctime to current time The function inode_set_ctime_current simply retrieves the current time and assigns it to the field __i_ctime without any alterations. Therefore, it is possible to set ctime to now directly using inode_set_ctime_to_ts Signed-off-by: Nguyen Dinh Phi Link: https://lore.kernel.org/r/20240228173031.3208743-1-phind.uet@gmail.com Signed-off-by: Christian Brauner --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index d2e8e3884b36..5c8a5250d0ac 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2510,7 +2510,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode) { struct timespec64 now = current_time(inode); - inode_set_ctime(inode, now.tv_sec, now.tv_nsec); + inode_set_ctime_to_ts(inode, now); return now; } EXPORT_SYMBOL(inode_set_ctime_current); -- cgit v1.2.3