From de394a86658ffe4e89e5328fd4993abfe41b7435 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 29 Jun 2022 00:00:25 -0400 Subject: ext4: update s_overhead_clusters in the superblock during an on-line resize When doing an online resize, the on-disk superblock on-disk wasn't updated. This means that when the file system is unmounted and remounted, and the on-disk overhead value is non-zero, this would result in the results of statfs(2) to be incorrect. This was partially fixed by Commits 10b01ee92df5 ("ext4: fix overhead calculation to account for the reserved gdt blocks"), 85d825dbf489 ("ext4: force overhead calculation if the s_overhead_cluster makes no sense"), and eb7054212eac ("ext4: update the cached overhead value in the superblock"). However, since it was too expensive to forcibly recalculate the overhead for bigalloc file systems at every mount, this didn't fix the problem for bigalloc file systems. This commit should address the problem when resizing file systems with the bigalloc feature enabled. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20220629040026.112371-1-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 8b70a4701293..e5c2713aa11a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1484,6 +1484,7 @@ static void ext4_update_super(struct super_block *sb, * Update the fs overhead information */ ext4_calculate_overhead(sb); + es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" -- cgit v1.2.3 From 827891a38accfb4e04dbcdefe710f8746c6ad16d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 29 Jun 2022 00:00:26 -0400 Subject: ext4: update the s_overhead_clusters in the backup sb's when resizing When the EXT4_IOC_RESIZE_FS ioctl is complete, update the backup superblocks. We don't do this for the old-style resize ioctls since they are quite ancient, and only used by very old versions of resize2fs --- and we don't want to update the backup superblocks every time EXT4_IOC_GROUP_ADD is called, since it might get called a lot. Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20220629040026.112371-2-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 ++-- fs/ext4/ioctl.c | 22 +++++++++++++++------- fs/ext4/resize.c | 5 ++++- fs/ext4/super.c | 2 +- 4 files changed, 22 insertions(+), 11 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index adfc30ee4b7b..310e976ef1fd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3016,7 +3016,7 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa); extern void ext4_reset_inode_seed(struct inode *inode); -int ext4_update_overhead(struct super_block *sb); +int ext4_update_overhead(struct super_block *sb, bool force); /* migrate.c */ extern int ext4_ext_migrate(struct inode *); @@ -3800,7 +3800,7 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; extern int ext4_resize_begin(struct super_block *sb); -extern void ext4_resize_end(struct super_block *sb); +extern int ext4_resize_end(struct super_block *sb, bool update_backups); static inline void ext4_set_io_unwritten_flag(struct inode *inode, struct ext4_io_end *io_end) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index cb01c1da0f9d..1702c574407a 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -944,7 +944,9 @@ static long ext4_ioctl_group_add(struct file *file, test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, input->group); group_add_out: - ext4_resize_end(sb); + err2 = ext4_resize_end(sb, false); + if (err == 0) + err = err2; return err; } @@ -1223,7 +1225,9 @@ setversion_out: err = err2; mnt_drop_write_file(filp); group_extend_out: - ext4_resize_end(sb); + err2 = ext4_resize_end(sb, false); + if (err == 0) + err = err2; return err; } @@ -1371,7 +1375,9 @@ mext_out: err = ext4_register_li_request(sb, o_group); resizefs_out: - ext4_resize_end(sb); + err2 = ext4_resize_end(sb, true); + if (err == 0) + err = err2; return err; } @@ -1599,13 +1605,15 @@ static void set_overhead(struct ext4_super_block *es, const void *arg) es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg)); } -int ext4_update_overhead(struct super_block *sb) +int ext4_update_overhead(struct super_block *sb, bool force) { struct ext4_sb_info *sbi = EXT4_SB(sb); - if (sb_rdonly(sb) || sbi->s_overhead == 0 || - sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters)) + if (sb_rdonly(sb)) + return 0; + if (!force && + (sbi->s_overhead == 0 || + sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))) return 0; - return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead); } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e5c2713aa11a..e4e89ca82f8c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -97,10 +97,13 @@ int ext4_resize_begin(struct super_block *sb) return ret; } -void ext4_resize_end(struct super_block *sb) +int ext4_resize_end(struct super_block *sb, bool update_backups) { clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags); smp_mb__after_atomic(); + if (update_backups) + return ext4_update_overhead(sb, true); + return 0; } static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 845f2f8aee5f..6a8a752d812b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5523,7 +5523,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) "Quota mode: %s.", descr, ext4_quota_mode(sb)); /* Update the s_overhead_clusters if necessary */ - ext4_update_overhead(sb); + ext4_update_overhead(sb, false); return 0; free_sbi: -- cgit v1.2.3 From 026d0d27c4882303e4b071ca6d996640cc2932c3 Mon Sep 17 00:00:00 2001 From: "Kiselev, Oleg" Date: Wed, 20 Jul 2022 04:26:22 +0000 Subject: ext4: reduce computation of overhead during resize This patch avoids doing an O(n**2)-complexity walk through every flex group. Instead, it uses the already computed overhead information for the newly allocated space, and simply adds it to the previously calculated overhead stored in the superblock. This drastically reduces the time taken to resize very large bigalloc filesystems (from 3+ hours for a 64TB fs down to milliseconds). Signed-off-by: Oleg Kiselev Link: https://lore.kernel.org/r/CE4F359F-4779-45E6-B6A9-8D67FDFF5AE2@amazon.com Signed-off-by: Theodore Ts'o --- fs/ext4/resize.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e4e89ca82f8c..5e3a893e600e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1383,6 +1383,17 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, return err; } +static void ext4_add_overhead(struct super_block *sb, + const ext4_fsblk_t overhead) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + + sbi->s_overhead += overhead; + es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); + smp_wmb(); +} + /* * ext4_update_super() updates the super block so that the newly added * groups can be seen by the filesystem. @@ -1484,9 +1495,17 @@ static void ext4_update_super(struct super_block *sb, } /* - * Update the fs overhead information + * Update the fs overhead information. + * + * For bigalloc, if the superblock already has a properly calculated + * overhead, update it with a value based on numbers already computed + * above for the newly allocated capacity. */ - ext4_calculate_overhead(sb); + if (ext4_has_feature_bigalloc(sb) && (sbi->s_overhead != 0)) + ext4_add_overhead(sb, + EXT4_NUM_B2C(sbi, blocks_count - free_blocks)); + else + ext4_calculate_overhead(sb); es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); if (test_opt(sb, DEBUG)) -- cgit v1.2.3 From 69cb8e9d8cd97cdf5e293b26d70a9dee3e35e6bd Mon Sep 17 00:00:00 2001 From: "Kiselev, Oleg" Date: Wed, 20 Jul 2022 04:27:48 +0000 Subject: ext4: avoid resizing to a partial cluster size This patch avoids an attempt to resize the filesystem to an unaligned cluster boundary. An online resize to a size that is not integral to cluster size results in the last iteration attempting to grow the fs by a negative amount, which trips a BUG_ON and leaves the fs with a corrupted in-memory superblock. Signed-off-by: Oleg Kiselev Link: https://lore.kernel.org/r/0E92A0AB-4F16-4F1A-94B7-702CC6504FDE@amazon.com Signed-off-by: Theodore Ts'o --- fs/ext4/resize.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/ext4/resize.c') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5e3a893e600e..fea2a68d067b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2011,6 +2011,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) } brelse(bh); + /* + * For bigalloc, trim the requested size to the nearest cluster + * boundary to avoid creating an unusable filesystem. We do this + * silently, instead of returning an error, to avoid breaking + * callers that blindly resize the filesystem to the full size of + * the underlying block device. + */ + if (ext4_has_feature_bigalloc(sb)) + n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1); + retry: o_blocks_count = ext4_blocks_count(es); -- cgit v1.2.3