From abaa6d4f6ab8371c5b73afb726ff1c012526e999 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 17:43:36 -0400 Subject: bcachefs: Fix return type of dirent_points_to_inode_nowarn() we're returning an error code now, not a bool Reported-by: Dan Carpenter Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0d8b782b63fb..c6c98ee87ec9 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -28,8 +28,8 @@ static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, inode->bi_dir_offset == d.k->p.offset; } -static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, - struct bch_inode_unpacked *inode) +static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, + struct bch_inode_unpacked *inode) { if (d.v->d_type == DT_SUBVOL ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol -- cgit v1.2.3 From 3b1425a4eb4e9750db8620c26e39390411eea185 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Oct 2024 21:31:31 -0400 Subject: bcachefs: Fix bch2_inode_is_open() check Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index c6c98ee87ec9..351de61c7ed1 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1106,7 +1106,7 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; } else { - if (fsck_err_on(bch2_inode_is_open(c, k.k->p), + if (fsck_err_on(!bch2_inode_is_open(c, k.k->p), trans, inode_unlinked_and_not_open, "inode %llu%u unlinked and not open", u.bi_inum, u.bi_snapshot)) { -- cgit v1.2.3 From 20826fe6b810bce3efba9ef5d74cf13ebe5f23d9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 02:44:12 -0400 Subject: bcachefs: Fix reattach_inode() Ensure a copy of the lost+found inode exists in the snapshot that we're reattaching, so that we don't trigger warnings in lookup_inode_for_snapshot() later. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 351de61c7ed1..881ad5b9447f 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -354,13 +354,12 @@ static int reattach_inode(struct btree_trans *trans, if (ret) return ret; - if (S_ISDIR(inode->bi_mode)) { - lostfound.bi_nlink++; + lostfound.bi_nlink += S_ISDIR(inode->bi_mode); - ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); - if (ret) - return ret; - } + /* ensure lost+found inode is also present in inode snapshot */ + ret = __bch2_fsck_write_inode(trans, &lostfound, inode_snapshot); + if (ret) + return ret; dir_hash = bch2_hash_info_init(c, &lostfound); -- cgit v1.2.3 From fda7b1ffdef75cc0f4d34255e88b5894e2ce75b1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 15:33:08 -0400 Subject: bcachefs: Create lost+found in correct snapshot Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 881ad5b9447f..f0d6696c4df6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -283,11 +283,17 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; create_lostfound: + /* + * we always create lost+found in the root snapshot; we don't want + * different branches of the snapshot tree to have different lost+found + */ + snapshot = le32_to_cpu(st.root_snapshot); /* * XXX: we could have a nicer log message here if we had a nice way to * walk backpointers to print a path */ - bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); + bch_notice(c, "creating lost+found in subvol %llu snapshot %u", + root_inum.subvol, le32_to_cpu(st.root_snapshot)); u64 now = bch2_current_time(c); struct btree_iter lostfound_iter = { NULL }; -- cgit v1.2.3 From 1c6051bbd76b2767d6acef6a1d0bdf99aa319273 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 15:27:37 -0400 Subject: bcachefs: Check for directories with no backpointers It's legal for regular files to have missing backpointers (due to hardlinks), and fsck should automatically add them, but for directories this is an error that should be flagged. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 22 +++++++++++++++------- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f0d6696c4df6..d2aa6a5bc973 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1826,6 +1826,21 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (inode_points_to_dirent(target, d)) return 0; + if (!target->bi_dir && + !target->bi_dir_offset) { + fsck_err_on(S_ISDIR(target->bi_mode), + trans, inode_dir_missing_backpointer, + "directory with missing backpointer\n%s", + (bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n "), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + target->bi_dir = d.k->p.inode; + target->bi_dir_offset = d.k->p.offset; + return __bch2_fsck_write_inode(trans, target, target_snapshot); + } + if (bch2_inode_should_have_bp(target) && !fsck_err(trans, inode_wrong_backpointer, "dirent points to inode that does not point back:\n %s", @@ -1835,13 +1850,6 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, buf.buf))) goto err; - if (!target->bi_dir && - !target->bi_dir_offset) { - target->bi_dir = d.k->p.inode; - target->bi_dir_offset = d.k->p.offset; - return __bch2_fsck_write_inode(trans, target, target_snapshot); - } - struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); ret = bkey_err(bp_dirent); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index b4024870b65e..fc3d31ed5975 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -219,6 +219,7 @@ enum bch_fsck_flags { x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \ x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ + x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -295,7 +296,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 284, 0) + x(MAX, 285, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- cgit v1.2.3 From c7da5ee2e5cc30faca49e3ea9dbecf8f6ee4f1ea Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 22:38:04 -0400 Subject: bcachefs: Check for unlinked inodes with dirents link count works differently in bcachefs - it's only nonzero for files with multiple hardlinks, which means we can also avoid checking it except for files that are known to have hardlinks. That means we need a few different checks instead; in particular, we don't want fsck to delet a file that has a dirent pointing to it. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 53 +++++++++++++++++++++++++++++++----------- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 41 insertions(+), 15 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index d2aa6a5bc973..09f890539ded 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1029,6 +1029,7 @@ static int check_inode(struct btree_trans *trans, bool full) { struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; struct bch_inode_unpacked u; bool do_update = false; int ret; @@ -1062,7 +1063,24 @@ static int check_inode(struct btree_trans *trans, trans, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); - return -BCH_ERR_fsck_repair_unimplemented; + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err_noprint; + } + + if (u.bi_dir || u.bi_dir_offset) { + ret = check_inode_dirent_inode(trans, &u, &do_update); + if (ret) + goto err; + } + + if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf))) { + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; } if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && @@ -1079,11 +1097,11 @@ static int check_inode(struct btree_trans *trans, bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; if (!bpos_eq(new_min_pos, POS_MIN)) bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); - return 0; + goto err_noprint; } if (u.bi_flags & BCH_INODE_unlinked) { @@ -1100,7 +1118,7 @@ static int check_inode(struct btree_trans *trans, */ ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) - return ret; + goto err_noprint; fsck_err_on(!ret, trans, unlinked_inode_not_on_deleted_list, @@ -1117,7 +1135,7 @@ static int check_inode(struct btree_trans *trans, u.bi_inum, u.bi_snapshot)) { ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); - return ret; + goto err_noprint; } } } @@ -1182,12 +1200,6 @@ static int check_inode(struct btree_trans *trans, do_update = true; } - if (u.bi_dir || u.bi_dir_offset) { - ret = check_inode_dirent_inode(trans, &u, &do_update); - if (ret) - goto err; - } - if (fsck_err_on(u.bi_parent_subvol && (u.bi_subvol == 0 || u.bi_subvol == BCACHEFS_ROOT_SUBVOL), @@ -1232,11 +1244,13 @@ do_update: ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; } err: fsck_err: bch_err_fn(c, ret); +err_noprint: + printbuf_exit(&buf); return ret; } @@ -1831,11 +1845,22 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, fsck_err_on(S_ISDIR(target->bi_mode), trans, inode_dir_missing_backpointer, "directory with missing backpointer\n%s", - (bch2_bkey_val_to_text(&buf, c, d.s_c), - prt_printf(&buf, "\n "), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), bch2_inode_unpacked_to_text(&buf, target), buf.buf)); + fsck_err_on(target->bi_flags & BCH_INODE_unlinked, + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + target->bi_flags &= ~BCH_INODE_unlinked; target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; return __bch2_fsck_write_inode(trans, target, target_snapshot); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index fc3d31ed5975..4c8ff4cc17c2 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -211,6 +211,7 @@ enum bch_fsck_flags { x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ + x(inode_unlinked_but_has_dirent, 285, 0) \ x(inode_checksum_type_invalid, 199, 0) \ x(inode_compression_type_invalid, 200, 0) \ x(inode_subvol_root_but_not_dir, 201, 0) \ @@ -296,7 +297,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 285, 0) + x(MAX, 286, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- cgit v1.2.3 From c9306a91c3fdc9915f5408561ea432c70b03383b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 23:38:37 -0400 Subject: bcachefs: Check for unlinked, non-empty dirs in check_inode() We want to check for this early so it can be reattached if necessary in check_unreachable_inodes(); better than letting it be deleted and having the children reattached, losing their filenames. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 17 +++++++++++++++++ fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 09f890539ded..30eca76554f6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1083,6 +1083,23 @@ static int check_inode(struct btree_trans *trans, do_update = true; } + if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) { + /* Check for this early so that check_unreachable_inode() will reattach it */ + + ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot); + if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty) + goto err; + + fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty, + "dir unlinked but not empty\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf)); + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; + ret = 0; + } + if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { struct bpos new_min_pos; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 4c8ff4cc17c2..4135b1ea2fec 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -221,6 +221,7 @@ enum bch_fsck_flags { x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ + x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -297,7 +298,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 286, 0) + x(MAX, 287, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- cgit v1.2.3 From 72350ee0ea22c053f2683e50f1beba97df2ad053 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Sep 2024 00:00:33 -0400 Subject: bcachefs: Kill snapshot arg to fsck_write_inode() It was initially believed that it would be better to be explicit about the snapshot we're updating when writing inodes in fsck; however, it turns out that passing around the snapshot separately is more error prone and we're usually updating the inode in the same snapshow we read it from. This is different from normal filesystem paths, where we do the update in the snapshot of the subvolume we're in. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 87 ++++++++++++++++++++++++------------------------- fs/bcachefs/inode.c | 12 +++---- fs/bcachefs/inode.h | 4 +-- fs/bcachefs/subvolume.c | 3 +- 4 files changed, 51 insertions(+), 55 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 30eca76554f6..b8a6ceb0cc7a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -137,16 +137,15 @@ found: return ret; } -static int lookup_inode(struct btree_trans *trans, u64 inode_nr, - struct bch_inode_unpacked *inode, - u32 *snapshot) +static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode) { struct btree_iter iter; struct bkey_s_c k; int ret; k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, inode_nr, *snapshot), 0); + SPOS(0, inode_nr, snapshot), 0); ret = bkey_err(k); if (ret) goto err; @@ -154,8 +153,6 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, ret = bkey_is_inode(k.k) ? bch2_inode_unpack(k, inode) : -BCH_ERR_ENOENT_inode; - if (!ret) - *snapshot = iter.pos.snapshot; err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -250,8 +247,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; - u32 root_inode_snapshot = snapshot; - ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); + ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", root_inum.inum, le32_to_cpu(st.master_subvol)); if (ret) @@ -277,7 +273,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ - ret = lookup_inode(trans, inum, lostfound, &snapshot); + ret = lookup_inode(trans, inum, snapshot, lostfound); bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); return ret; @@ -302,6 +298,7 @@ create_lostfound: bch2_inode_init_early(c, lostfound); bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); lostfound->bi_dir = root_inode.bi_inum; + lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); root_inode.bi_nlink++; @@ -329,9 +326,7 @@ err: return ret; } -static int reattach_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 inode_snapshot) +static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bch_fs *c = trans->c; struct bch_hash_info dir_hash; @@ -339,7 +334,7 @@ static int reattach_inode(struct btree_trans *trans, char name_buf[20]; struct qstr name; u64 dir_offset = 0; - u32 dirent_snapshot = inode_snapshot; + u32 dirent_snapshot = inode->bi_snapshot; int ret; if (inode->bi_subvol) { @@ -363,7 +358,12 @@ static int reattach_inode(struct btree_trans *trans, lostfound.bi_nlink += S_ISDIR(inode->bi_mode); /* ensure lost+found inode is also present in inode snapshot */ - ret = __bch2_fsck_write_inode(trans, &lostfound, inode_snapshot); + if (!inode->bi_subvol) { + BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot)); + lostfound.bi_snapshot = inode->bi_snapshot; + } + + ret = __bch2_fsck_write_inode(trans, &lostfound); if (ret) return ret; @@ -388,7 +388,7 @@ static int reattach_inode(struct btree_trans *trans, inode->bi_dir = lostfound.bi_inum; inode->bi_dir_offset = dir_offset; - return __bch2_fsck_write_inode(trans, inode, inode_snapshot); + return __bch2_fsck_write_inode(trans, inode); } static int remove_backpointer(struct btree_trans *trans, @@ -427,7 +427,7 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume if (ret) return ret; - ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); return ret; } @@ -545,8 +545,9 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); new_inode.bi_size = i_size; new_inode.bi_inum = inum; + new_inode.bi_snapshot = snapshot; - return __bch2_fsck_write_inode(trans, &new_inode, snapshot); + return __bch2_fsck_write_inode(trans, &new_inode); } struct snapshots_seen { @@ -1110,7 +1111,7 @@ static int check_inode(struct btree_trans *trans, u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) @@ -1258,7 +1259,7 @@ static int check_inode(struct btree_trans *trans, } do_update: if (do_update) { - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) goto err_noprint; @@ -1383,7 +1384,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal w->last_pos.inode, i->snapshot, i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1825,7 +1826,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ "directory %llu:%u with wrong i_nlink: got %u, should be %llu", w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { i->inode.bi_nlink = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1846,8 +1847,7 @@ noinline_for_stack static int check_dirent_inode_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -1880,7 +1880,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, target->bi_flags &= ~BCH_INODE_unlinked; target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; - return __bch2_fsck_write_inode(trans, target, target_snapshot); + return __bch2_fsck_write_inode(trans, target); } if (bch2_inode_should_have_bp(target) && @@ -1893,7 +1893,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, goto err; struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, - SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); + SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot)); ret = bkey_err(bp_dirent); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -1906,14 +1906,14 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, "inode %llu:%u has wrong backpointer:\n" "got %llu:%llu\n" "should be %llu:%llu", - target->bi_inum, target_snapshot, + target->bi_inum, target->bi_snapshot, target->bi_dir, target->bi_dir_offset, d.k->p.inode, d.k->p.offset)) { target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); goto out; } @@ -1928,7 +1928,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, trans, inode_dir_multiple_links, "%s %llu:%u with multiple links\n%s", S_ISDIR(target->bi_mode) ? "directory" : "subvolume", - target->bi_inum, target_snapshot, buf.buf)) { + target->bi_inum, target->bi_snapshot, buf.buf)) { ret = __remove_dirent(trans, d.k->p); goto out; } @@ -1941,10 +1941,10 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (fsck_err_on(backpointer_exists && !target->bi_nlink, trans, inode_multiple_links_but_nlink_0, "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", - target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { + target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { target->bi_nlink++; target->bi_flags &= ~BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); if (ret) goto err; } @@ -1961,15 +1961,14 @@ noinline_for_stack static int check_dirent_target(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct bkey_i_dirent *n; struct printbuf buf = PRINTBUF; int ret = 0; - ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); + ret = check_dirent_inode_dirent(trans, iter, d, target); if (ret) goto err; @@ -2128,7 +2127,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u64 target_inum = le64_to_cpu(s.v->inode); u32 target_snapshot = le32_to_cpu(s.v->snapshot); - ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); + ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -2144,13 +2143,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * target_inum, subvol_root.bi_parent_subvol, parent_subvol)) { subvol_root.bi_parent_subvol = parent_subvol; - ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); + subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &subvol_root); if (ret) goto err; } - ret = check_dirent_target(trans, iter, d, &subvol_root, - target_snapshot); + ret = check_dirent_target(trans, iter, d, &subvol_root); if (ret) goto err; out: @@ -2243,8 +2242,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, } darray_for_each(target->inodes, i) { - ret = check_dirent_target(trans, iter, d, - &i->inode, i->snapshot); + ret = check_dirent_target(trans, iter, d, &i->inode); if (ret) goto err; } @@ -2385,7 +2383,7 @@ static int check_root_trans(struct btree_trans *trans) goto err; } - ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); + ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode); if (ret && !bch2_err_matches(ret, ENOENT)) return ret; @@ -2398,8 +2396,9 @@ static int check_root_trans(struct btree_trans *trans) bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); root_inode.bi_inum = inum; + root_inode.bi_snapshot = snapshot; - ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); + ret = __bch2_fsck_write_inode(trans, &root_inode); bch_err_msg(c, ret, "writing root inode"); } err: @@ -2566,7 +2565,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf))) - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); goto out; } @@ -2612,7 +2611,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino if (ret) break; - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); } break; @@ -2842,7 +2841,7 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], bch2_inode_nlink_get(&u), link->count)) { bch2_inode_nlink_set(&u, link->count); - ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); } fsck_err: return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 753c208896c3..19aa31c6812f 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -387,9 +387,7 @@ int bch2_inode_write_flags(struct btree_trans *trans, return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); } -int __bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bkey_inode_buf *inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); @@ -398,19 +396,17 @@ int __bch2_fsck_write_inode(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(inode_p, inode); - inode_p->inode.k.p.snapshot = snapshot; + inode_p->inode.k.p.snapshot = inode->bi_snapshot; return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, &inode_p->inode.k_i, BTREE_UPDATE_internal_snapshot_node); } -int bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fsck_write_inode(trans, inode, snapshot)); + __bch2_fsck_write_inode(trans, inode)); bch_err_fn(trans->c, ret); return ret; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 695abd707cb6..f597d10a252d 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -112,8 +112,8 @@ static inline int bch2_inode_write(struct btree_trans *trans, return bch2_inode_write_flags(trans, iter, inode, 0); } -int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); -int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); +int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); +int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); void bch2_inode_init_early(struct bch_fs *, struct bch_inode_unpacked *); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 6845dde1b339..44210a86c367 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -102,7 +102,8 @@ static int check_subvol(struct btree_trans *trans, inode.bi_inum, inode.bi_snapshot, inode.bi_subvol, subvol.k->p.offset)) { inode.bi_subvol = subvol.k->p.offset; - ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); + inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; } -- cgit v1.2.3 From bade9711e0905eaa99e2ed98fc9642acaf9ba2b5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 23:40:28 -0400 Subject: bcachefs: Split out check_unreachable_inodes() pass With inode backpointers, we can write a very simple check_unreachable_inodes() pass that only looks for non-unlinked inodes that are missing backpointers, and reattaches them. This simplifies check_directory_structure() so that it's now only checking for directory structure loops, Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 100 +++++++++++++++++++++++------------- fs/bcachefs/fsck.h | 1 + fs/bcachefs/recovery_passes_types.h | 1 + 3 files changed, 67 insertions(+), 35 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index b8a6ceb0cc7a..257366ec7939 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1292,6 +1292,58 @@ int bch2_check_inodes(struct bch_fs *c) return ret; } +static int check_unreachable_inode(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + int ret = 0; + + if (!bkey_is_inode(k.k)) + return 0; + + struct bch_inode_unpacked inode; + BUG_ON(bch2_inode_unpack(k, &inode)); + + if (inode.bi_subvol) + return 0; + + if (inode.bi_flags & BCH_INODE_unlinked) + return 0; + + if (fsck_err_on(!inode.bi_dir, + trans, inode_unreachable, + "unreachable inode:\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) + ret = reattach_inode(trans, &inode); +fsck_err: + printbuf_exit(&buf); + return ret; +} + +/* + * Reattach unreachable (but not unlinked) inodes + * + * Run after check_inodes() and check_dirents(), so we node that inode + * backpointer fields point to valid dirents, and every inode that has a dirent + * that points to it has its backpointer field set - so we're just looking for + * non-unlinked inodes without backpointers: + */ +int bch2_check_unreachable_inodes(struct bch_fs *c) +{ + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + POS_MIN, + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_unreachable_inode(trans, &iter, k))); + bch_err_fn(c, ret); + return ret; +} + static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) { switch (btree) { @@ -2450,22 +2502,6 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, if (ret) break; - /* - * We've checked that inode backpointers point to valid dirents; - * here, it's sufficient to check that the subvolume root has a - * dirent: - */ - if (fsck_err_on(!subvol_root.bi_dir, - trans, subvol_unreachable, - "unreachable subvolume %s", - (bch2_bkey_val_to_text(&buf, c, s.s_c), - prt_newline(&buf), - bch2_inode_unpacked_to_text(&buf, &subvol_root), - buf.buf))) { - ret = reattach_subvol(trans, s); - break; - } - u32 parent = le32_to_cpu(s.v->fs_path_parent); if (darray_u32_has(&subvol_path, parent)) { @@ -2526,12 +2562,6 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) return false; } -/* - * Check that a given inode is reachable from its subvolume root - we already - * verified subvolume connectivity: - * - * XXX: we should also be verifying that inodes are in the right subvolumes - */ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) { struct bch_fs *c = trans->c; @@ -2545,6 +2575,9 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino BUG_ON(bch2_inode_unpack(inode_k, &inode)); + if (!S_ISDIR(inode.bi_mode)) + return 0; + while (!inode.bi_subvol) { struct btree_iter dirent_iter; struct bkey_s_c_dirent d; @@ -2559,21 +2592,15 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino bch2_trans_iter_exit(trans, &dirent_iter); if (bch2_err_matches(ret, ENOENT)) { - ret = 0; - if (fsck_err(trans, inode_unreachable, - "unreachable inode\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, inode_k), - buf.buf))) - ret = reattach_inode(trans, &inode); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, inode_k); + bch_err(c, "unreachable inode in check_directory_structure: %s\n%s", + bch2_err_str(ret), buf.buf); goto out; } bch2_trans_iter_exit(trans, &dirent_iter); - if (!S_ISDIR(inode.bi_mode)) - break; - ret = darray_push(p, ((struct pathbuf_entry) { .inum = inode.bi_inum, .snapshot = snapshot, @@ -2626,9 +2653,8 @@ fsck_err: } /* - * Check for unreachable inodes, as well as loops in the directory structure: - * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's - * unreachable: + * Check for loops in the directory structure: all other connectivity issues + * have been fixed by prior passes */ int bch2_check_directory_structure(struct bch_fs *c) { @@ -2756,6 +2782,10 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, if (S_ISDIR(u.bi_mode)) continue; + /* + * Previous passes ensured that bi_nlink is nonzero if + * it had multiple hardlinks: + */ if (!u.bi_nlink) continue; diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h index a4ef94271784..1cca31011530 100644 --- a/fs/bcachefs/fsck.h +++ b/fs/bcachefs/fsck.h @@ -9,6 +9,7 @@ int bch2_check_dirents(struct bch_fs *); int bch2_check_xattrs(struct bch_fs *); int bch2_check_root(struct bch_fs *); int bch2_check_subvolume_structure(struct bch_fs *); +int bch2_check_unreachable_inodes(struct bch_fs *); int bch2_check_directory_structure(struct bch_fs *); int bch2_check_nlinks(struct bch_fs *); int bch2_fix_reflink_p(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h index 50406ce0e4ef..9d96c06e365c 100644 --- a/fs/bcachefs/recovery_passes_types.h +++ b/fs/bcachefs/recovery_passes_types.h @@ -46,6 +46,7 @@ x(check_dirents, 27, PASS_FSCK) \ x(check_xattrs, 28, PASS_FSCK) \ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ + x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ -- cgit v1.2.3 From 38864eccf78b4e8ab9e2b7a4320943b1feb6872a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Sep 2024 19:03:19 -0400 Subject: bcachefs: reattach_inode() now correctly handles interior snapshot nodes When we find an unreachable inode, we now reattach it in the oldest version that needs to be reattached (thus avoiding redundant work reattaching every single version), and we now fix up inode -> dirent backpointers in newer versions as needed - or white out the reattaching dirent in newer versions, if the newer version isn't supposed to be reattached. This results in the second verify fsck now passing cleanly after repairing on a user-provided filesystem image with thousands of different snapshots. Reported-by: Christopher Snowhill Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 8 +++ fs/bcachefs/fsck.c | 170 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 158 insertions(+), 20 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 78e63ad7d380..31a58bf46fdb 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -857,6 +857,14 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\ SPOS_MAX, _flags, _k, _ret) +#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ + _start, _flags, _k, _ret) \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + (_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \ + !((_ret) = bkey_err(_k)) && (_k).k; \ + bch2_btree_iter_rewind(&(_iter))) + #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 257366ec7939..92f9cabb6eae 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -326,17 +326,54 @@ err: return ret; } +static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) +{ + if (inode->bi_inum == BCACHEFS_ROOT_INO && + inode->bi_subvol == BCACHEFS_ROOT_SUBVOL) + return false; + + return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked); +} + +static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot) +{ + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_dirents, + SPOS(d_pos.inode, d_pos.offset, snapshot), + BTREE_ITER_intent| + BTREE_ITER_with_updates); + int ret = bkey_err(k); + if (ret) + return ret; + + if (bpos_eq(k.k->p, d_pos)) { + /* + * delet_at() doesn't work because the update path doesn't + * internally use BTREE_ITER_with_updates yet + */ + struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); + ret = PTR_ERR_OR_ZERO(k); + if (ret) + goto err; + + bkey_init(&k->k); + k->k.type = KEY_TYPE_whiteout; + k->k.p = iter.pos; + ret = bch2_trans_update(trans, &iter, k, BTREE_UPDATE_internal_snapshot_node); + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bch_fs *c = trans->c; - struct bch_hash_info dir_hash; struct bch_inode_unpacked lostfound; char name_buf[20]; - struct qstr name; - u64 dir_offset = 0; - u32 dirent_snapshot = inode->bi_snapshot; int ret; + u32 dirent_snapshot = inode->bi_snapshot; if (inode->bi_subvol) { inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; @@ -367,9 +404,10 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (ret) return ret; - dir_hash = bch2_hash_info_init(c, &lostfound); + struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound); + struct qstr name = (struct qstr) QSTR(name_buf); - name = (struct qstr) QSTR(name_buf); + inode->bi_dir = lostfound.bi_inum; ret = bch2_dirent_create_snapshot(trans, inode->bi_parent_subvol, lostfound.bi_inum, @@ -378,17 +416,70 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * inode_d_type(inode), &name, inode->bi_subvol ?: inode->bi_inum, - &dir_offset, + &inode->bi_dir_offset, STR_HASH_must_create); if (ret) { bch_err_msg(c, ret, "error creating dirent"); return ret; } - inode->bi_dir = lostfound.bi_inum; - inode->bi_dir_offset = dir_offset; + ret = __bch2_fsck_write_inode(trans, inode); + if (ret) + return ret; + + /* + * Fix up inodes in child snapshots: if they should also be reattached + * update the backpointer field, if they should not be we need to emit + * whiteouts for the dirent we just created. + */ + if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) { + snapshot_id_list whiteouts_done; + struct btree_iter iter; + struct bkey_s_c k; + + darray_init(&whiteouts_done); - return __bch2_fsck_write_inode(trans, inode); + for_each_btree_key_reverse_norestart(trans, iter, + BTREE_ID_inodes, SPOS(0, inode->bi_inum, inode->bi_snapshot - 1), + BTREE_ITER_all_snapshots|BTREE_ITER_intent, k, ret) { + if (k.k->p.offset != inode->bi_inum) + break; + + if (!bkey_is_inode(k.k) || + !bch2_snapshot_is_ancestor(c, k.k->p.snapshot, inode->bi_snapshot) || + snapshot_list_has_ancestor(c, &whiteouts_done, k.k->p.snapshot)) + continue; + + struct bch_inode_unpacked child_inode; + bch2_inode_unpack(k, &child_inode); + + if (!inode_should_reattach(&child_inode)) { + ret = maybe_delete_dirent(trans, + SPOS(lostfound.bi_inum, inode->bi_dir_offset, + dirent_snapshot), + k.k->p.snapshot); + if (ret) + break; + + ret = snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot); + if (ret) + break; + } else { + iter.snapshot = k.k->p.snapshot; + child_inode.bi_dir = inode->bi_dir; + child_inode.bi_dir_offset = inode->bi_dir_offset; + + ret = bch2_inode_write_flags(trans, &iter, &child_inode, + BTREE_UPDATE_internal_snapshot_node); + if (ret) + break; + } + } + darray_exit(&whiteouts_done); + bch2_trans_iter_exit(trans, &iter); + } + + return ret; } static int remove_backpointer(struct btree_trans *trans, @@ -1292,11 +1383,49 @@ int bch2_check_inodes(struct bch_fs *c) return ret; } +static int find_oldest_inode_needs_reattach(struct btree_trans *trans, + struct bch_inode_unpacked *inode) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + /* + * We look for inodes to reattach in natural key order, leaves first, + * but we should do the reattach at the oldest version that needs to be + * reattached: + */ + for_each_btree_key_norestart(trans, iter, + BTREE_ID_inodes, + SPOS(0, inode->bi_inum, inode->bi_snapshot + 1), + BTREE_ITER_all_snapshots, k, ret) { + if (k.k->p.offset != inode->bi_inum) + break; + + if (!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, k.k->p.snapshot)) + continue; + + if (!bkey_is_inode(k.k)) + break; + + struct bch_inode_unpacked parent_inode; + bch2_inode_unpack(k, &parent_inode); + + if (!inode_should_reattach(&parent_inode)) + break; + + *inode = parent_inode; + } + bch2_trans_iter_exit(trans, &iter); + + return ret; +} + static int check_unreachable_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { - struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; int ret = 0; @@ -1306,18 +1435,17 @@ static int check_unreachable_inode(struct btree_trans *trans, struct bch_inode_unpacked inode; BUG_ON(bch2_inode_unpack(k, &inode)); - if (inode.bi_subvol) + if (!inode_should_reattach(&inode)) return 0; - if (inode.bi_flags & BCH_INODE_unlinked) - return 0; + ret = find_oldest_inode_needs_reattach(trans, &inode); + if (ret) + return ret; - if (fsck_err_on(!inode.bi_dir, - trans, inode_unreachable, - "unreachable inode:\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), - buf.buf))) + if (fsck_err(trans, inode_unreachable, + "unreachable inode:\n%s", + (bch2_inode_unpacked_to_text(&buf, &inode), + buf.buf))) ret = reattach_inode(trans, &inode); fsck_err: printbuf_exit(&buf); @@ -1331,6 +1459,8 @@ fsck_err: * backpointer fields point to valid dirents, and every inode that has a dirent * that points to it has its backpointer field set - so we're just looking for * non-unlinked inodes without backpointers: + * + * XXX: this is racy w.r.t. hardlink removal in online fsck */ int bch2_check_unreachable_inodes(struct bch_fs *c) { -- cgit v1.2.3 From cba31b7eee41eb34941d040bddaed3628f160cae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 23:30:05 -0400 Subject: bcachefs: Delete vestigal check_inode() checks BCH_INODE_i_size_dirty dates from before we had logged operations for truncate (as well as finsert) - it hasn't been needed since before bcachefs was mainlined. BCH_INODE_i_sectors_dirty hasn't been needed since we started always updating i_sectors transactionally - it's been unused for even longer. BCH_INODE_backptr_untrusted also hasn't been used since prior to mainlining; when unlinking a hardling, we zero out the backpointer fields if they're for the dirent being removed. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 80 ++++-------------------------------------------------- 1 file changed, 5 insertions(+), 75 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 92f9cabb6eae..171e3e47db5c 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1085,7 +1085,6 @@ static int check_inode_dirent_inode(struct btree_trans *trans, */ inode->bi_dir = 0; inode->bi_dir_offset = 0; - inode->bi_flags &= ~BCH_INODE_backptr_untrusted; *write_inode = true; } @@ -1117,8 +1116,7 @@ static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, struct bch_inode_unpacked *prev, - struct snapshots_seen *s, - bool full) + struct snapshots_seen *s) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -1141,12 +1139,6 @@ static int check_inode(struct btree_trans *trans, BUG_ON(bch2_inode_unpack(k, &u)); - if (!full && - !(u.bi_flags & (BCH_INODE_i_size_dirty| - BCH_INODE_i_sectors_dirty| - BCH_INODE_unlinked))) - return 0; - if (prev->bi_inum != u.bi_inum) *prev = u; @@ -1192,7 +1184,7 @@ static int check_inode(struct btree_trans *trans, ret = 0; } - if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && + if ((u.bi_flags & BCH_INODE_unlinked) && bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { struct bpos new_min_pos; @@ -1200,7 +1192,7 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; - u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; + u.bi_flags &= ~BCH_INODE_unlinked; ret = __bch2_fsck_write_inode(trans, &u); @@ -1249,66 +1241,6 @@ static int check_inode(struct btree_trans *trans, } } - /* i_size_dirty is vestigal, since we now have logged ops for truncate * */ - if (u.bi_flags & BCH_INODE_i_size_dirty && - (!test_bit(BCH_FS_clean_recovery, &c->flags) || - fsck_err(trans, inode_i_size_dirty_but_clean, - "filesystem marked clean, but inode %llu has i_size dirty", - u.bi_inum))) { - bch_verbose(c, "truncating inode %llu", u.bi_inum); - - /* - * XXX: need to truncate partial blocks too here - or ideally - * just switch units to bytes and that issue goes away - */ - ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, - SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, - iter->pos.snapshot), - POS(u.bi_inum, U64_MAX), - 0, NULL); - bch_err_msg(c, ret, "in fsck truncating inode"); - if (ret) - return ret; - - /* - * We truncated without our normal sector accounting hook, just - * make sure we recalculate it: - */ - u.bi_flags |= BCH_INODE_i_sectors_dirty; - - u.bi_flags &= ~BCH_INODE_i_size_dirty; - do_update = true; - } - - /* i_sectors_dirty is vestigal, i_sectors is always updated transactionally */ - if (u.bi_flags & BCH_INODE_i_sectors_dirty && - (!test_bit(BCH_FS_clean_recovery, &c->flags) || - fsck_err(trans, inode_i_sectors_dirty_but_clean, - "filesystem marked clean, but inode %llu has i_sectors dirty", - u.bi_inum))) { - s64 sectors; - - bch_verbose(c, "recounting sectors for inode %llu", - u.bi_inum); - - sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); - if (sectors < 0) { - bch_err_msg(c, sectors, "in fsck recounting inode sectors"); - return sectors; - } - - u.bi_sectors = sectors; - u.bi_flags &= ~BCH_INODE_i_sectors_dirty; - do_update = true; - } - - if (u.bi_flags & BCH_INODE_backptr_untrusted) { - u.bi_dir = 0; - u.bi_dir_offset = 0; - u.bi_flags &= ~BCH_INODE_backptr_untrusted; - do_update = true; - } - if (fsck_err_on(u.bi_parent_subvol && (u.bi_subvol == 0 || u.bi_subvol == BCACHEFS_ROOT_SUBVOL), @@ -1365,7 +1297,6 @@ err_noprint: int bch2_check_inodes(struct bch_fs *c) { - bool full = c->opts.fsck; struct bch_inode_unpacked prev = { 0 }; struct snapshots_seen s; @@ -1376,7 +1307,7 @@ int bch2_check_inodes(struct bch_fs *c) POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_inode(trans, &iter, k, &prev, &s, full))); + check_inode(trans, &iter, k, &prev, &s))); snapshots_seen_exit(&s); bch_err_fn(c, ret); @@ -1876,8 +1807,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) continue; - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && - k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && + if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && !bkey_extent_is_reservation(k), trans, extent_past_end_of_inode, "extent type past end of inode %llu:%u, i_size %llu\n %s", -- cgit v1.2.3 From 9b23fdbd5d29beb5bd272c304e0d978edd32f513 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 22:11:37 -0400 Subject: bcachefs: bcachefs_metadata_version_inode_has_child_snapshots There's an inherent race in taking a snapshot while an unlinked file is open, and then reattaching it in the child snapshot. In the interior snapshot node the file will appear unlinked, as though it should be deleted - it's not referenced by anything in that snapshot - but we can't delete it, because the file data is referenced by the child snapshot. This was being handled incorrectly with propagate_key_to_snapshot_leaves() - but that doesn't resolve the fundamental inconsistency of "this file looks like it should be deleted according to normal rules, but - ". To fix this, we need to fix the rule for when an inode is deleted. The previous rule, ignoring snapshots (there was no well-defined rule for with snapshots) was: Unlinked, non open files are deleted, either at recovery time or during online fsck The new rule is: Unlinked, non open files, that do not exist in child snapshots, are deleted. To make this work transactionally, we add a new inode flag, BCH_INODE_has_child_snapshot; it overrides BCH_INODE_unlinked when considering whether to delete an inode, or put it on the deleted list. For transactional consistency, clearing it handled by the inode trigger: when deleting an inode we check if there are parent inodes which can now have the BCH_INODE_has_child_snapshot flag cleared. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/fs.c | 21 +++- fs/bcachefs/fs.h | 9 +- fs/bcachefs/fsck.c | 51 +++----- fs/bcachefs/inode.c | 274 ++++++++++++++++++++++++++++++++++++----- fs/bcachefs/inode.h | 10 ++ fs/bcachefs/inode_format.h | 3 +- fs/bcachefs/sb-downgrade.c | 5 +- fs/bcachefs/sb-errors_format.h | 4 +- 9 files changed, 302 insertions(+), 78 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 84832c2d4df9..5004f6ba997c 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -678,7 +678,8 @@ struct bch_sb_field_ext { x(disk_accounting_v2, BCH_VERSION(1, 9)) \ x(disk_accounting_v3, BCH_VERSION(1, 10)) \ x(disk_accounting_inum, BCH_VERSION(1, 11)) \ - x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) + x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ + x(inode_has_child_snapshots, BCH_VERSION(1, 13)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5bfc26d58270..23cae92d313d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -174,11 +174,30 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { .automatic_shrinking = true, }; -struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) { return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); } +bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) +{ + if (!test_bit(BCH_FS_started, &c->flags)) + return false; + + subvol_inum inum = { + .subvol = snapshot_t(c, p.snapshot)->subvol, + .inum = p.offset, + }; + + /* snapshot tree interior node, can't safely delete while online (yet) */ + if (!inum.subvol) { + bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot); + return true; + } + + return __bch2_inode_hash_find(c, inum) != NULL; +} + static void __wait_on_freeing_inode(struct bch_fs *c, struct bch_inode_info *inode, subvol_inum inum) diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index da74ecc236e7..40dbd5774d0b 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -54,8 +54,6 @@ static inline subvol_inum inode_inum(struct bch_inode_info *inode) return inode->ei_inum; } -struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum); - /* * Set if we've gotten a btree error for this inode, and thus the vfs inode and * btree inode may be inconsistent: @@ -181,6 +179,8 @@ void bch2_inode_update_after_write(struct btree_trans *, int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, inode_set_fn, void *, unsigned); +bool bch2_inode_is_open(struct bch_fs *c, struct bpos p); + int bch2_setattr_nonsize(struct mnt_idmap *, struct bch_inode_info *, struct iattr *); @@ -198,10 +198,7 @@ int bch2_vfs_init(void); #define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); }) -static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) -{ - return NULL; -} +static inline bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { return false; } static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) {} diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 171e3e47db5c..f00a36f62323 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1096,22 +1096,6 @@ fsck_err: return ret; } -static bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) -{ - subvol_inum inum = { - .subvol = snapshot_t(c, p.snapshot)->subvol, - .inum = p.offset, - }; - - /* snapshot tree corruption, can't safely delete */ - if (!inum.subvol) { - bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot); - return true; - } - - return __bch2_inode_hash_find(c, inum) != NULL; -} - static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -1184,28 +1168,27 @@ static int check_inode(struct btree_trans *trans, ret = 0; } - if ((u.bi_flags & BCH_INODE_unlinked) && - bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { - struct bpos new_min_pos; - - ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); - if (ret) - goto err; - - u.bi_flags &= ~BCH_INODE_unlinked; - - ret = __bch2_fsck_write_inode(trans, &u); + ret = bch2_inode_has_child_snapshots(trans, k.k->p); + if (ret < 0) + goto err; - bch_err_msg(c, ret, "in fsck updating inode"); + if (fsck_err_on(ret != !!(u.bi_flags & BCH_INODE_has_child_snapshot), + trans, inode_has_child_snapshots_wrong, + "inode has_child_snapshots flag wrong (should be %u)\n%s", + ret, + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf))) { if (ret) - goto err_noprint; - - if (!bpos_eq(new_min_pos, POS_MIN)) - bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); - goto err_noprint; + u.bi_flags |= BCH_INODE_has_child_snapshot; + else + u.bi_flags &= ~BCH_INODE_has_child_snapshot; + do_update = true; } + ret = 0; - if (u.bi_flags & BCH_INODE_unlinked) { + if ((u.bi_flags & BCH_INODE_unlinked) && + !(u.bi_flags & BCH_INODE_has_child_snapshot)) { if (!test_bit(BCH_FS_started, &c->flags)) { /* * If we're not in online fsck, don't delete unlinked diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 74d7a42ba1a2..9d6040d4ba39 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -12,6 +12,7 @@ #include "error.h" #include "extents.h" #include "extent_update.h" +#include "fs.h" #include "inode.h" #include "str_hash.h" #include "snapshot.h" @@ -34,6 +35,8 @@ static const char * const bch2_inode_flag_strs[] = { }; #undef x +static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos); + static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; static int inode_decode_field(const u8 *in, const u8 *end, @@ -575,9 +578,137 @@ static inline u64 bkey_inode_flags(struct bkey_s_c k) } } -static inline bool bkey_is_deleted_inode(struct bkey_s_c k) +static inline void bkey_inode_flags_set(struct bkey_s k, u64 f) +{ + switch (k.k->type) { + case KEY_TYPE_inode: + bkey_s_to_inode(k).v->bi_flags = cpu_to_le32(f); + return; + case KEY_TYPE_inode_v2: + bkey_s_to_inode_v2(k).v->bi_flags = cpu_to_le64(f); + return; + case KEY_TYPE_inode_v3: + bkey_s_to_inode_v3(k).v->bi_flags = cpu_to_le64(f); + return; + default: + BUG(); + } +} + +static inline bool bkey_is_unlinked_inode(struct bkey_s_c k) +{ + unsigned f = bkey_inode_flags(k) & BCH_INODE_unlinked; + + return (f & BCH_INODE_unlinked) && !(f & BCH_INODE_has_child_snapshot); +} + +static struct bkey_s_c +bch2_bkey_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter *iter, + enum btree_id btree, struct bpos pos, + unsigned flags) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key_upto_norestart(trans, *iter, btree, + bpos_successor(pos), + SPOS(pos.inode, pos.offset, U32_MAX), + flags|BTREE_ITER_all_snapshots, k, ret) + if (bch2_snapshot_is_ancestor(c, pos.snapshot, k.k->p.snapshot)) + return k; + + bch2_trans_iter_exit(trans, iter); + return ret ? bkey_s_c_err(ret) : bkey_s_c_null; +} + +static struct bkey_s_c +bch2_inode_get_iter_snapshot_parent(struct btree_trans *trans, struct btree_iter *iter, + struct bpos pos, unsigned flags) +{ + struct bkey_s_c k; +again: + k = bch2_bkey_get_iter_snapshot_parent(trans, iter, BTREE_ID_inodes, pos, flags); + if (!k.k || + bkey_err(k) || + bkey_is_inode(k.k)) + return k; + + bch2_trans_iter_exit(trans, iter); + pos = k.k->p; + goto again; +} + +int __bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) { - return bkey_inode_flags(k) & BCH_INODE_unlinked; + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key_upto_norestart(trans, iter, + BTREE_ID_inodes, POS(0, pos.offset), bpos_predecessor(pos), + BTREE_ITER_all_snapshots| + BTREE_ITER_with_updates, k, ret) + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot) && + bkey_is_inode(k.k)) { + ret = 1; + break; + } + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static int update_inode_has_children(struct btree_trans *trans, + struct bkey_s k, + bool have_child) +{ + if (!have_child) { + int ret = bch2_inode_has_child_snapshots(trans, k.k->p); + if (ret) + return ret < 0 ? ret : 0; + } + + u64 f = bkey_inode_flags(k.s_c); + if (have_child != !!(f & BCH_INODE_has_child_snapshot)) + bkey_inode_flags_set(k, f ^ BCH_INODE_has_child_snapshot); + + return 0; +} + +static int update_parent_inode_has_children(struct btree_trans *trans, struct bpos pos, + bool have_child) +{ + struct btree_iter iter; + struct bkey_s_c k = bch2_inode_get_iter_snapshot_parent(trans, + &iter, pos, BTREE_ITER_with_updates); + int ret = bkey_err(k); + if (ret) + return ret; + if (!k.k) + return 0; + + if (!have_child) { + ret = bch2_inode_has_child_snapshots(trans, k.k->p); + if (ret) { + ret = ret < 0 ? ret : 0; + goto err; + } + } + + u64 f = bkey_inode_flags(k); + if (have_child != !!(f & BCH_INODE_has_child_snapshot)) { + struct bkey_i *update = bch2_bkey_make_mut(trans, &iter, &k, + BTREE_UPDATE_internal_snapshot_node); + ret = PTR_ERR_OR_ZERO(update); + if (ret) + goto err; + + bkey_inode_flags_set(bkey_i_to_s(update), f ^ BCH_INODE_has_child_snapshot); + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; } int bch2_trigger_inode(struct btree_trans *trans, @@ -586,6 +717,8 @@ int bch2_trigger_inode(struct btree_trans *trans, struct bkey_s new, enum btree_iter_update_trigger_flags flags) { + struct bch_fs *c = trans->c; + if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { BUG_ON(!trans->journal_res.seq); bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); @@ -599,13 +732,41 @@ int bch2_trigger_inode(struct btree_trans *trans, return ret; } - int deleted_delta = (int) bkey_is_deleted_inode(new.s_c) - - (int) bkey_is_deleted_inode(old); - if ((flags & BTREE_TRIGGER_transactional) && deleted_delta) { - int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, - new.k->p, deleted_delta > 0); - if (ret) - return ret; + if (flags & BTREE_TRIGGER_transactional) { + int unlinked_delta = (int) bkey_is_unlinked_inode(new.s_c) - + (int) bkey_is_unlinked_inode(old); + if (unlinked_delta) { + int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, + new.k->p, unlinked_delta > 0); + if (ret) + return ret; + } + + /* + * If we're creating or deleting an inode at this snapshot ID, + * and there might be an inode in a parent snapshot ID, we might + * need to set or clear the has_child_snapshot flag on the + * parent. + */ + int deleted_delta = (int) bkey_is_inode(new.k) - + (int) bkey_is_inode(old.k); + if (deleted_delta && + bch2_snapshot_parent(c, new.k->p.snapshot)) { + int ret = update_parent_inode_has_children(trans, new.k->p, + deleted_delta > 0); + if (ret) + return ret; + } + + /* + * When an inode is first updated in a new snapshot, we may need + * to clear has_child_snapshot + */ + if (deleted_delta > 0) { + int ret = update_inode_has_children(trans, new, false); + if (ret) + return ret; + } } return 0; @@ -888,6 +1049,11 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; + if (ret) + goto err2; + + ret = delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); +err2: bch2_trans_put(trans); return ret; } @@ -992,7 +1158,7 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i return 0; } -int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) +static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; struct btree_iter iter = { NULL }; @@ -1055,6 +1221,44 @@ err: return ret ?: -BCH_ERR_transaction_restart_nested; } +/* + * After deleting an inode, there may be versions in older snapshots that should + * also be deleted - if they're not referenced by sibling snapshots and not open + * in other subvolumes: + */ +static int delete_ancestor_snapshot_inodes(struct btree_trans *trans, struct bpos pos) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret; +next_parent: + ret = lockrestart_do(trans, + bkey_err(k = bch2_inode_get_iter_snapshot_parent(trans, &iter, pos, 0))); + if (ret || !k.k) + return ret; + + bool unlinked = bkey_is_unlinked_inode(k); + pos = k.k->p; + bch2_trans_iter_exit(trans, &iter); + + if (!unlinked) + return 0; + + if (bch2_inode_is_open(trans->c, pos)) + return 0; + + ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot); + if (ret) + return ret; + goto next_parent; +} + +int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) +{ + return __bch2_inode_rm_snapshot(trans, inum, snapshot) ?: + delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); +} + static int may_delete_deleted_inode(struct btree_trans *trans, struct btree_iter *iter, struct bpos pos, @@ -1064,6 +1268,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct btree_iter inode_iter; struct bkey_s_c k; struct bch_inode_unpacked inode; + struct printbuf buf = PRINTBUF; int ret; k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_cached); @@ -1099,6 +1304,31 @@ static int may_delete_deleted_inode(struct btree_trans *trans, pos.offset, pos.snapshot)) goto delete; + if (fsck_err_on(inode.bi_flags & BCH_INODE_has_child_snapshot, + trans, deleted_inode_has_child_snapshots, + "inode with child snapshots %llu:%u in deleted_inodes btree", + pos.offset, pos.snapshot)) + goto delete; + + ret = bch2_inode_has_child_snapshots(trans, k.k->p); + if (ret < 0) + goto out; + + if (ret) { + if (fsck_err(trans, inode_has_child_snapshots_wrong, + "inode has_child_snapshots flag wrong (should be set)\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &inode), + buf.buf))) { + inode.bi_flags |= BCH_INODE_has_child_snapshot; + ret = __bch2_fsck_write_inode(trans, &inode); + if (ret) + goto out; + } + goto delete; + + } + if (test_bit(BCH_FS_clean_recovery, &c->flags) && !fsck_err(trans, deleted_inode_but_clean, "filesystem marked as clean but have deleted inode %llu:%u", @@ -1107,33 +1337,11 @@ static int may_delete_deleted_inode(struct btree_trans *trans, goto out; } - if (bch2_snapshot_is_internal_node(c, pos.snapshot)) { - struct bpos new_min_pos; - - ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos); - if (ret) - goto out; - - inode.bi_flags &= ~BCH_INODE_unlinked; - - ret = bch2_inode_write_flags(trans, &inode_iter, &inode, - BTREE_UPDATE_internal_snapshot_node); - bch_err_msg(c, ret, "clearing inode unlinked flag"); - if (ret) - goto out; - - /* - * We'll need another write buffer flush to pick up the new - * unlinked inodes in the snapshot leaves: - */ - *need_another_pass = true; - goto out; - } - ret = 1; out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); + printbuf_exit(&buf); return ret; delete: ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 9c1f67705684..c8e98443e2d4 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -5,6 +5,7 @@ #include "bkey.h" #include "bkey_methods.h" #include "opts.h" +#include "snapshot.h" enum bch_validate_flags; extern const char * const bch2_inode_opts[]; @@ -17,6 +18,15 @@ int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +int __bch2_inode_has_child_snapshots(struct btree_trans *, struct bpos); + +static inline int bch2_inode_has_child_snapshots(struct btree_trans *trans, struct bpos pos) +{ + return bch2_snapshot_is_leaf(trans->c, pos.snapshot) <= 0 + ? __bch2_inode_has_child_snapshots(trans, pos) + : 0; +} + int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 83d107331edf..a204e46b6b47 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -133,7 +133,8 @@ enum inode_opt_id { x(i_size_dirty, 5) \ x(i_sectors_dirty, 6) \ x(unlinked, 7) \ - x(backptr_untrusted, 8) + x(backptr_untrusted, 8) \ + x(has_child_snapshot, 9) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 5102059a0f1d..ae715ff658e8 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -78,7 +78,10 @@ BCH_FSCK_ERR_accounting_mismatch) \ x(rebalance_work_acct_fix, \ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ - BCH_FSCK_ERR_accounting_mismatch) + BCH_FSCK_ERR_accounting_mismatch) \ + x(inode_has_child_snapshots, \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ + BCH_FSCK_ERR_inode_has_child_snapshots_wrong) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 4135b1ea2fec..4cdddf15d752 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -225,11 +225,13 @@ enum bch_fsck_flags { x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ + x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \ + x(deleted_inode_has_child_snapshots, 288, FSCK_AUTOFIX) \ x(extent_overlapping, 215, 0) \ x(key_in_missing_inode, 216, 0) \ x(key_in_wrong_inode_type, 217, 0) \ @@ -298,7 +300,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 287, 0) + x(MAX, 289, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- cgit v1.2.3 From 9d86178782a25fac105e550e1c29c7d3f8470116 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Oct 2024 21:23:41 -0400 Subject: bcachefs: bch2_inode_or_descendents_is_open() fsck can now correctly check if inodes in interior snapshot nodes are open/in use. - Tweak the vfs inode rhashtable so that the subvolume ID isn't hashed, meaning inums in different subvolumes will hash to the same slot. Note that this is a hack, and will cause problems if anyone ever has the same file in many different snapshots open all at the same time. - Then check if any of those subvolumes is a descendent of the snapshot ID being checked Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 106 ++++++++++++++++++++++++++++++++++++++++++++-------- fs/bcachefs/fs.h | 6 +-- fs/bcachefs/fsck.c | 7 +++- fs/bcachefs/inode.c | 5 ++- 4 files changed, 103 insertions(+), 21 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 23cae92d313d..e9e32d21f82d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -157,6 +157,20 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b) return a.subvol == b.subvol && a.inum == b.inum; } +static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed) +{ + const subvol_inum *inum = data; + + return jhash(&inum->inum, sizeof(inum->inum), seed); +} + +static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed) +{ + const struct bch_inode_info *inode = data; + + return bch2_vfs_inode_hash_fn(&inode->ei_inum, sizeof(inode->ei_inum), seed); +} + static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { @@ -170,32 +184,93 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { .head_offset = offsetof(struct bch_inode_info, hash), .key_offset = offsetof(struct bch_inode_info, ei_inum), .key_len = sizeof(subvol_inum), + .hashfn = bch2_vfs_inode_hash_fn, + .obj_hashfn = bch2_vfs_inode_obj_hash_fn, .obj_cmpfn = bch2_vfs_inode_cmp_fn, .automatic_shrinking = true, }; -static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) { - return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); -} + struct bch_fs *c = trans->c; + struct rhashtable *ht = &c->vfs_inodes_table; + subvol_inum inum = (subvol_inum) { .inum = p.offset }; + DARRAY(u32) subvols; + int ret = 0; -bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) -{ if (!test_bit(BCH_FS_started, &c->flags)) return false; - subvol_inum inum = { - .subvol = snapshot_t(c, p.snapshot)->subvol, - .inum = p.offset, - }; + darray_init(&subvols); +restart_from_top: + + /* + * Tweaked version of __rhashtable_lookup(); we need to get a list of + * subvolumes in which the given inode number is open. + * + * For this to work, we don't include the subvolume ID in the key that + * we hash - all inodes with the same inode number regardless of + * subvolume will hash to the same slot. + * + * This will be less than ideal if the same file is ever open + * simultaneously in many different snapshots: + */ + rcu_read_lock(); + struct rhash_lock_head __rcu *const *bkt; + struct rhash_head *he; + unsigned int hash; + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); +restart: + hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params); + bkt = rht_bucket(tbl, hash); + do { + struct bch_inode_info *inode; + + rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) { + if (inode->ei_inum.inum == inum.inum) { + ret = darray_push_gfp(&subvols, inode->ei_inum.subvol, + GFP_NOWAIT|__GFP_NOWARN); + if (ret) { + rcu_read_unlock(); + ret = darray_make_room(&subvols, 1); + if (ret) + goto err; + subvols.nr = 0; + goto restart_from_top; + } + } + } + /* An object might have been moved to a different hash chain, + * while we walk along it - better check and retry. + */ + } while (he != RHT_NULLS_MARKER(bkt)); + + /* Ensure we see any new tables. */ + smp_rmb(); + + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) + goto restart; + rcu_read_unlock(); + + darray_for_each(subvols, i) { + u32 snap; + ret = bch2_subvolume_get_snapshot(trans, *i, &snap); + if (ret) + goto err; - /* snapshot tree interior node, can't safely delete while online (yet) */ - if (!inum.subvol) { - bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot); - return true; + ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot); + if (ret) + break; } +err: + darray_exit(&subvols); + return ret; +} - return __bch2_inode_hash_find(c, inum) != NULL; +static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +{ + return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); } static void __wait_on_freeing_inode(struct bch_fs *c, @@ -271,7 +346,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, set_bit(EI_INODE_HASHED, &inode->ei_flags); retry: - if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table, + if (unlikely(rhashtable_lookup_insert_key(&c->vfs_inodes_table, + &inode->ei_inum, &inode->hash, bch2_vfs_inodes_params))) { old = bch2_inode_hash_find(c, trans, inode->ei_inum); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 40dbd5774d0b..59f9f7ae728d 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -146,6 +146,8 @@ struct bch_inode_info * __bch2_create(struct mnt_idmap *, struct bch_inode_info *, struct dentry *, umode_t, dev_t, subvol_inum, unsigned); +int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p); + int bch2_fs_quota_transfer(struct bch_fs *, struct bch_inode_info *, struct bch_qid, @@ -179,8 +181,6 @@ void bch2_inode_update_after_write(struct btree_trans *, int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, inode_set_fn, void *, unsigned); -bool bch2_inode_is_open(struct bch_fs *c, struct bpos p); - int bch2_setattr_nonsize(struct mnt_idmap *, struct bch_inode_info *, struct iattr *); @@ -198,7 +198,7 @@ int bch2_vfs_init(void); #define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); }) -static inline bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { return false; } +static inline int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) { return 0; } static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) {} diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f00a36f62323..a1087fd292e4 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1213,7 +1213,11 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; } else { - if (fsck_err_on(!bch2_inode_is_open(c, k.k->p), + ret = bch2_inode_or_descendents_is_open(trans, k.k->p); + if (ret < 0) + goto err; + + if (fsck_err_on(!ret, trans, inode_unlinked_and_not_open, "inode %llu%u unlinked and not open", u.bi_inum, u.bi_snapshot)) { @@ -1221,6 +1225,7 @@ static int check_inode(struct btree_trans *trans, bch_err_msg(c, ret, "in fsck deleting inode"); goto err_noprint; } + ret = 0; } } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 9d6040d4ba39..2c037e84fbae 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1244,8 +1244,9 @@ next_parent: if (!unlinked) return 0; - if (bch2_inode_is_open(trans->c, pos)) - return 0; + ret = lockrestart_do(trans, bch2_inode_or_descendents_is_open(trans, pos)); + if (ret) + return ret < 0 ? ret : 0; ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot); if (ret) -- cgit v1.2.3 From a0d11feefb1998204f095fa0400024403d233108 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Oct 2024 21:53:26 -0400 Subject: bcachefs: Don't use commit_do() unnecessarily Using commit_do() to call alloc_sectors_start_trans() breaks when we're randomly injecting transaction restarts - the restart in the commit causes us to leak the lock that alloc_sectorS_start_trans() takes. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/btree_iter.h | 2 ++ fs/bcachefs/btree_update.c | 4 ++-- fs/bcachefs/btree_update.h | 2 +- fs/bcachefs/btree_update_interior.c | 4 +--- fs/bcachefs/disk_accounting.c | 6 ++++-- fs/bcachefs/ec.c | 18 +++++++++--------- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 2 +- fs/bcachefs/inode.c | 3 +-- fs/bcachefs/io_read.c | 4 ++-- fs/bcachefs/io_write.c | 4 ++-- fs/bcachefs/quota.c | 2 +- fs/bcachefs/rebalance.c | 4 +++- fs/bcachefs/recovery.c | 2 +- fs/bcachefs/subvolume.c | 7 +++---- fs/bcachefs/super.c | 2 +- fs/bcachefs/tests.c | 4 ++-- fs/bcachefs/xattr.c | 2 +- 21 files changed, 41 insertions(+), 39 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 69c7fc97e467..c84a91572a1d 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1977,7 +1977,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work) ca->mi.bucket_size, GFP_KERNEL); - int ret = bch2_trans_do(c, NULL, NULL, + int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_WATERMARK_btree| BCH_TRANS_COMMIT_no_enospc, bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index d0e0b56892e3..5836870ab882 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -684,7 +684,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, struct bch_dev_usage usage; struct open_bucket *ob; - bch2_trans_do(c, NULL, NULL, 0, + bch2_trans_do(c, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, cl, false, &usage))); return ob; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index cf933409d385..6296a11ccb09 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1871,7 +1871,7 @@ static void btree_node_write_work(struct work_struct *work) } } else { - ret = bch2_trans_do(c, NULL, NULL, 0, + ret = bch2_trans_do(c, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_journal_reclaim| diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 31a58bf46fdb..0bda054f80d7 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -912,6 +912,8 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); _ret; \ }) +#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do)) + struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned); void bch2_trans_put(struct btree_trans *); diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 514df618548e..5d809e8bd170 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -668,7 +668,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, struct disk_reservation *disk_res, int flags, enum btree_iter_update_trigger_flags iter_flags) { - return bch2_trans_do(c, disk_res, NULL, flags, + return bch2_trans_commit_do(c, disk_res, NULL, flags, bch2_btree_insert_trans(trans, id, k, iter_flags)); } @@ -865,7 +865,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, memcpy(l->d, buf.buf, buf.pos); c->journal.early_journal_entries.nr += jset_u64s(u64s); } else { - ret = bch2_trans_do(c, NULL, NULL, + ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|commit_flags, __bch2_trans_log_msg(trans, &buf, u64s)); } diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 6a454f2fa005..70b3c989fac2 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -192,7 +192,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_flags))) -#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ +#define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) #define trans_for_each_update(_trans, _i) \ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 190bc1e81756..64f0928e1137 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2239,10 +2239,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work) struct async_btree_rewrite *a = container_of(work, struct async_btree_rewrite, work); struct bch_fs *c = a->c; - int ret; - ret = bch2_trans_do(c, NULL, NULL, 0, - async_btree_node_rewrite_trans(trans, a)); + int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); bch_err_fn_ratelimited(c, ret); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); kfree(a); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e309fb78529b..07eb8fa1b026 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -856,8 +856,10 @@ int bch2_dev_usage_init(struct bch_dev *ca, bool gc) }; u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; - int ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc)); + int ret = bch2_trans_do(c, ({ + bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?: + (!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0); + })); bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index e410cfe37b1a..a2303425959d 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1186,7 +1186,7 @@ static void ec_stripe_delete_work(struct work_struct *work) if (!idx) break; - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ec_stripe_delete(trans, idx)); bch_err_fn(c, ret); if (ret) @@ -1519,14 +1519,14 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err; } - ret = bch2_trans_do(c, &s->res, NULL, - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc, - ec_stripe_key_update(trans, - s->have_existing_stripe - ? bkey_i_to_stripe(&s->existing_stripe.key) - : NULL, - bkey_i_to_stripe(&s->new_stripe.key))); + ret = bch2_trans_commit_do(c, &s->res, NULL, + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc, + ec_stripe_key_update(trans, + s->have_existing_stripe + ? bkey_i_to_stripe(&s->existing_stripe.key) + : NULL, + bkey_i_to_stripe(&s->new_stripe.key))); bch_err_msg(c, ret, "creating stripe key"); if (ret) { goto err; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 165d95994bd2..506b9a2b5bd7 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -658,7 +658,7 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); struct bch_inode_info *inode; - bch2_trans_do(c, NULL, NULL, 0, + bch2_trans_do(c, PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir), &hash, &dentry->d_name))); if (IS_ERR(inode)) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index a1087fd292e4..807098e76e7a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2509,7 +2509,7 @@ fsck_err: /* Get root directory, create if it doesn't exist: */ int bch2_check_root(struct bch_fs *c) { - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_root_trans(trans)); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 2c037e84fbae..47544dc91d9e 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1087,8 +1087,7 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, struct bch_inode_unpacked *inode) { - return bch2_trans_do(c, NULL, NULL, 0, - bch2_inode_find_by_inum_trans(trans, inum, inode)); + return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode)); } int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index f00beff9ca0e..fc246f342820 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -557,8 +557,8 @@ out: static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - bch2_trans_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_rbio_narrow_crcs(trans, rbio)); + bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_rbio_narrow_crcs(trans, rbio)); } /* Inner part that may run in process context */ diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index b5fe9e0dc155..8609e25e450f 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1437,7 +1437,7 @@ again: * freeing up space on specific disks, which means that * allocations for specific disks may hang arbitrarily long: */ - ret = bch2_trans_do(c, NULL, NULL, 0, + ret = bch2_trans_run(c, lockrestart_do(trans, bch2_alloc_sectors_start_trans(trans, op->target, op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), @@ -1447,7 +1447,7 @@ again: op->nr_replicas_required, op->watermark, op->flags, - &op->cl, &wp)); + &op->cl, &wp))); if (unlikely(ret)) { if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) break; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index c32a05e252e2..74f45a8162ad 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -869,7 +869,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bkey_quota_init(&new_quota.k_i); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - ret = bch2_trans_do(c, NULL, NULL, 0, + ret = bch2_trans_commit_do(c, NULL, NULL, 0, bch2_set_quota_trans(trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 2d299a37cf07..cd6647374353 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -70,7 +70,9 @@ err: int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) { - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + int ret = bch2_trans_commit_do(c, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_lazy_rw, __bch2_set_rebalance_needs_scan(trans, inum)); rebalance_wakeup(c); return ret; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 55e1504a8130..454b5a32dd7f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1091,7 +1091,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_init_early(c, &lostfound_inode); - ret = bch2_trans_do(c, NULL, NULL, 0, + ret = bch2_trans_commit_do(c, NULL, NULL, 0, bch2_create_trans(trans, BCACHEFS_ROOT_SUBVOL_INUM, &root_inode, &lostfound_inode, diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 91d8187ee168..80e5efaff524 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -319,8 +319,7 @@ int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) { - return bch2_trans_do(c, NULL, NULL, 0, - bch2_subvol_is_ro_trans(trans, subvol)); + return bch2_trans_do(c, bch2_subvol_is_ro_trans(trans, subvol)); } int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, @@ -676,8 +675,8 @@ err: /* set bi_subvol on root inode */ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, - __bch2_fs_upgrade_for_subvolumes(trans)); + int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, + __bch2_fs_upgrade_for_subvolumes(trans)); bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 77d811a539af..657fd3759e7b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1972,7 +1972,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) }; u64 v[3] = { nbuckets - old_nbuckets, 0, 0 }; - ret = bch2_trans_do(ca->fs, NULL, NULL, 0, + ret = bch2_trans_commit_do(ca->fs, NULL, NULL, 0, bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), false)) ?: bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); if (ret) diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index b2f209743afe..315038a0a92d 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -450,7 +450,7 @@ static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, k.k_i.k.p.snapshot = snapid; k.k_i.k.size = len; - ret = bch2_trans_do(c, NULL, NULL, 0, + ret = bch2_trans_commit_do(c, NULL, NULL, 0, bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i, BTREE_UPDATE_internal_snapshot_node)); bch_err_fn(c, ret); @@ -510,7 +510,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr) if (ret) return ret; - ret = bch2_trans_do(c, NULL, NULL, 0, + ret = bch2_trans_commit_do(c, NULL, NULL, 0, bch2_snapshot_node_create(trans, U32_MAX, snapids, snapid_subvols, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 56c8d3fe55a4..952aca400faf 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -330,7 +330,7 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret = bch2_trans_do(c, NULL, NULL, 0, + int ret = bch2_trans_do(c, bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); if (ret < 0 && bch2_err_matches(ret, ENOENT)) -- cgit v1.2.3 From 15a3836c8ed7bf102159e70ed380b8158651df8e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Oct 2024 23:00:14 -0400 Subject: bcachefs: Repair mismatches in inode hash seed, type Different versions of the same inode (same inode number, different snapshot ID) must have the same hash seed and type - lookups require this, since they see keys from different snapshots simultaneously. To repair we only need to make the inodes consistent, hash_check_key() will do the rest. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 807098e76e7a..16a408210bb7 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1096,10 +1096,36 @@ fsck_err: return ret; } +static int get_snapshot_root_inode(struct btree_trans *trans, + struct bch_inode_unpacked *root, + u64 inum) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, + SPOS(0, inum, U32_MAX), + BTREE_ITER_all_snapshots, k, ret) { + if (k.k->p.offset != inum) + break; + if (bkey_is_inode(k.k)) + goto found_root; + } + if (ret) + goto err; + BUG(); +found_root: + BUG_ON(bch2_inode_unpack(k, root)); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, - struct bch_inode_unpacked *prev, + struct bch_inode_unpacked *snapshot_root, struct snapshots_seen *s) { struct bch_fs *c = trans->c; @@ -1123,16 +1149,19 @@ static int check_inode(struct btree_trans *trans, BUG_ON(bch2_inode_unpack(k, &u)); - if (prev->bi_inum != u.bi_inum) - *prev = u; + if (snapshot_root->bi_inum != u.bi_inum) { + ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum); + if (ret) + goto err; + } - if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || - inode_d_type(prev) != inode_d_type(&u), + if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed || + INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root), trans, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { - bch_err(c, "repair not implemented yet"); - ret = -BCH_ERR_fsck_repair_unimplemented; - goto err_noprint; + u.bi_hash_seed = snapshot_root->bi_hash_seed; + SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root)); + do_update = true; } if (u.bi_dir || u.bi_dir_offset) { @@ -1285,7 +1314,7 @@ err_noprint: int bch2_check_inodes(struct bch_fs *c) { - struct bch_inode_unpacked prev = { 0 }; + struct bch_inode_unpacked snapshot_root = {}; struct snapshots_seen s; snapshots_seen_init(&s); @@ -1295,7 +1324,7 @@ int bch2_check_inodes(struct bch_fs *c) POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_inode(trans, &iter, k, &prev, &s))); + check_inode(trans, &iter, k, &snapshot_root, &s))); snapshots_seen_exit(&s); bch_err_fn(c, ret); -- cgit v1.2.3 From bc6d2d10418e1bfdb95b16f5dd4cca42d5dec766 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 18 Oct 2024 00:22:09 -0400 Subject: bcachefs: fsck: Improve hash_check_key() hash_check_key() checks and repairs the hash table btrees: dirents and xattrs are open addressing hash tables. We recently had a corruption reported where the hash type on an inode somehow got flipped, which made the existing dirents invisible and allowed new ones to be created with the same name. Now, hash_check_key() can repair duplicates: it will delete one of them, if it has an xattr or dangling dirent, but if it has two valid dirents one of them gets renamed. Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 7 -- fs/bcachefs/dirent.h | 7 ++ fs/bcachefs/fsck.c | 230 +++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 194 insertions(+), 50 deletions(-) (limited to 'fs/bcachefs/fsck.c') diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 84dd4a879d98..faffc98d5605 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -250,13 +250,6 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, return ret; } -static void dirent_copy_target(struct bkey_i_dirent *dst, - struct bkey_s_c_dirent src) -{ - dst->v.d_inum = src.v->d_inum; - dst->v.d_type = src.v->d_type; -} - int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, struct bkey_s_c_dirent d, subvol_inum *target) { diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 8945145865c5..53ad99666022 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -34,6 +34,13 @@ static inline unsigned dirent_val_u64s(unsigned len) int bch2_dirent_read_target(struct btree_trans *, subvol_inum, struct bkey_s_c_dirent, subvol_inum *); +static inline void dirent_copy_target(struct bkey_i_dirent *dst, + struct bkey_s_c_dirent src) +{ + dst->v.d_inum = src.v->d_inum; + dst->v.d_type = src.v->d_type; +} + int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32, const struct bch_hash_info *, u8, const struct qstr *, u64, u64 *, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 16a408210bb7..75c8a97a6954 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -929,35 +929,138 @@ static int get_visible_inodes(struct btree_trans *trans, return ret; } -static int hash_redo_key(struct btree_trans *trans, - const struct bch_hash_desc desc, - struct bch_hash_info *hash_info, - struct btree_iter *k_iter, struct bkey_s_c k) -{ - struct bkey_i *delete; - struct bkey_i *tmp; - - delete = bch2_trans_kmalloc(trans, sizeof(*delete)); - if (IS_ERR(delete)) - return PTR_ERR(delete); - - tmp = bch2_bkey_make_mut_noupdate(trans, k); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - bkey_init(&delete->k); - delete->k.p = k_iter->pos; - return bch2_btree_iter_traverse(k_iter) ?: - bch2_trans_update(trans, k_iter, delete, 0) ?: - bch2_hash_set_in_snapshot(trans, desc, hash_info, - (subvol_inum) { 0, k.k->p.inode }, - k.k->p.snapshot, tmp, - STR_HASH_must_create| - BTREE_UPDATE_internal_snapshot_node) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); +static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) +{ + if (d.v->d_type == DT_SUBVOL) { + u32 snap; + u64 inum; + int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum); + if (ret && !bch2_err_matches(ret, ENOENT)) + return ret; + return !ret; + } else { + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); + int ret = bkey_err(k); + if (ret) + return ret; + + ret = bkey_is_inode(k.k); + bch2_trans_iter_exit(trans, &iter); + return ret; + } +} + +/* + * Prefer to delete the first one, since that will be the one at the wrong + * offset: + * return value: 0 -> delete k1, 1 -> delete k2 + */ +static int hash_pick_winner(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_s_c k1, + struct bkey_s_c k2) +{ + if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && + !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) + return 0; + + switch (desc.btree_id) { + case BTREE_ID_dirents: { + int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1)); + if (ret < 0) + return ret; + if (!ret) + return 0; + + ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2)); + if (ret < 0) + return ret; + if (!ret) + return 1; + return 2; + } + default: + return 0; + } +} + +static int fsck_update_backpointers(struct btree_trans *trans, + struct snapshots_seen *s, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_i *new) +{ + if (new->k.type != KEY_TYPE_dirent) + return 0; + + struct bkey_i_dirent *d = bkey_i_to_dirent(new); + struct inode_walker target = inode_walker_init(); + int ret = 0; + + if (d->v.d_type == DT_SUBVOL) { + BUG(); + } else { + ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); + if (ret) + goto err; + + darray_for_each(target.inodes, i) { + i->inode.bi_dir_offset = d->k.p.offset; + ret = __bch2_fsck_write_inode(trans, &i->inode); + if (ret) + goto err; + } + } +err: + inode_walker_exit(&target); + return ret; +} + +static int fsck_rename_dirent(struct btree_trans *trans, + struct snapshots_seen *s, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_s_c_dirent old) +{ + struct qstr old_name = bch2_dirent_get_name(old); + struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); + int ret = PTR_ERR_OR_ZERO(new); + if (ret) + return ret; + + bkey_dirent_init(&new->k_i); + dirent_copy_target(new, old); + new->k.p = old.k->p; + + for (unsigned i = 0; i < 1000; i++) { + unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", + old_name.len, old_name.name, i); + unsigned u64s = BKEY_U64s + dirent_val_u64s(len); + + if (u64s > U8_MAX) + return -EINVAL; + + new->k.u64s = u64s; + + ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, + (subvol_inum) { 0, old.k->p.inode }, + old.k->p.snapshot, &new->k_i, + BTREE_UPDATE_internal_snapshot_node); + if (!bch2_err_matches(ret, EEXIST)) + break; + } + + if (ret) + return ret; + + return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); } static int hash_check_key(struct btree_trans *trans, + struct snapshots_seen *s, const struct bch_hash_desc desc, struct bch_hash_info *hash_info, struct btree_iter *k_iter, struct bkey_s_c hash_k) @@ -986,16 +1089,9 @@ static int hash_check_key(struct btree_trans *trans, if (bkey_eq(k.k->p, hash_k.k->p)) break; - if (fsck_err_on(k.k->type == desc.key_type && - !desc.cmp_bkey(k, hash_k), - trans, hash_table_key_duplicate, - "duplicate hash table keys:\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, hash_k), - buf.buf))) { - ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; - break; - } + if (k.k->type == desc.key_type && + !desc.cmp_bkey(k, hash_k)) + goto duplicate_entries; if (bkey_deleted(k.k)) { bch2_trans_iter_exit(trans, &iter); @@ -1008,18 +1104,66 @@ out: return ret; bad_hash: if (fsck_err(trans, hash_table_key_wrong_offset, - "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", + "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { - ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); - bch_err_fn(c, ret); + struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); + if (IS_ERR(new)) + return PTR_ERR(new); + + k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info, + (subvol_inum) { 0, hash_k.k->p.inode }, + hash_k.k->p.snapshot, new, + STR_HASH_must_create| + BTREE_ITER_with_updates| + BTREE_UPDATE_internal_snapshot_node); + ret = bkey_err(k); if (ret) - return ret; - ret = -BCH_ERR_transaction_restart_nested; + goto out; + if (k.k) + goto duplicate_entries; + + ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, + BTREE_UPDATE_internal_snapshot_node) ?: + fsck_update_backpointers(trans, s, desc, hash_info, new) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_transaction_restart_nested; + goto out; } fsck_err: goto out; +duplicate_entries: + ret = hash_pick_winner(trans, desc, hash_info, hash_k, k); + if (ret < 0) + goto out; + + if (!fsck_err(trans, hash_table_key_duplicate, + "duplicate hash table keys%s:\n%s", + ret != 2 ? "" : ", both point to valid inodes", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, hash_k), + prt_newline(&buf), + bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) + goto out; + + switch (ret) { + case 0: + ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); + break; + case 1: + ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0); + break; + case 2: + ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: + bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); + goto out; + } + + ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: + -BCH_ERR_transaction_restart_nested; + goto out; } static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, @@ -2336,7 +2480,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &i->inode); dir->first_this_inode = false; - ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); + ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k); if (ret < 0) goto err; if (ret) { @@ -2450,7 +2594,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &i->inode); inode->first_this_inode = false; - ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); + ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k); bch_err_fn(c, ret); return ret; } -- cgit v1.2.3