diff options
Diffstat (limited to 'fs/bcachefs/extents.c')
| -rw-r--r-- | fs/bcachefs/extents.c | 525 |
1 files changed, 1 insertions, 524 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 6c1cc90ab320..8f511760102a 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -8,14 +8,11 @@ #include "bcachefs.h" #include "bkey_methods.h" -#include "bkey_on_stack.h" #include "btree_gc.h" -#include "btree_update.h" -#include "btree_update_interior.h" +#include "btree_iter.h" #include "buckets.h" #include "checksum.h" #include "debug.h" -#include "dirent.h" #include "disk_groups.h" #include "error.h" #include "extents.h" @@ -26,7 +23,6 @@ #include "super-io.h" #include "trace.h" #include "util.h" -#include "xattr.h" unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k) { @@ -826,525 +822,6 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) return -val_u64s_delta; } -static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - unsigned ret = 0; - - bkey_extent_entry_for_each(ptrs, entry) { - switch (__extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - case BCH_EXTENT_ENTRY_stripe_ptr: - ret++; - } - } - - return ret; -} - -static int count_iters_for_insert(struct btree_trans *trans, - struct bkey_s_c k, - unsigned offset, - struct bpos *end, - unsigned *nr_iters, - unsigned max_iters, - bool overwrite) -{ - int ret = 0; - - switch (k.k->type) { - case KEY_TYPE_extent: - case KEY_TYPE_reflink_v: - *nr_iters += bch2_bkey_nr_alloc_ptrs(k); - - if (*nr_iters >= max_iters) { - *end = bpos_min(*end, k.k->p); - ret = 1; - } - - break; - case KEY_TYPE_reflink_p: { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx = le64_to_cpu(p.v->idx); - unsigned sectors = bpos_min(*end, p.k->p).offset - - bkey_start_offset(p.k); - struct btree_iter *iter; - struct bkey_s_c r_k; - - for_each_btree_key(trans, iter, - BTREE_ID_REFLINK, POS(0, idx + offset), - BTREE_ITER_SLOTS, r_k, ret) { - if (bkey_cmp(bkey_start_pos(r_k.k), - POS(0, idx + sectors)) >= 0) - break; - - *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); - - if (*nr_iters >= max_iters) { - struct bpos pos = bkey_start_pos(k.k); - pos.offset += r_k.k->p.offset - idx; - - *end = bpos_min(*end, pos); - ret = 1; - break; - } - } - - bch2_trans_iter_put(trans, iter); - break; - } - } - - return ret; -} - -#define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) - -int bch2_extent_atomic_end(struct btree_iter *iter, - struct bkey_i *insert, - struct bpos *end) -{ - struct btree_trans *trans = iter->trans; - struct btree *b; - struct btree_node_iter node_iter; - struct bkey_packed *_k; - unsigned nr_iters = 0; - int ret; - - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; - - b = iter->l[0].b; - node_iter = iter->l[0].iter; - - BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0); - - *end = bpos_min(insert->k.p, b->key.k.p); - - ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end, - &nr_iters, EXTENT_ITERS_MAX / 2, false); - if (ret < 0) - return ret; - - while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_discard))) { - struct bkey unpacked; - struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); - unsigned offset = 0; - - if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0) - break; - - if (bkey_cmp(bkey_start_pos(&insert->k), - bkey_start_pos(k.k)) > 0) - offset = bkey_start_offset(&insert->k) - - bkey_start_offset(k.k); - - ret = count_iters_for_insert(trans, k, offset, end, - &nr_iters, EXTENT_ITERS_MAX, true); - if (ret) - break; - - bch2_btree_node_iter_advance(&node_iter, b); - } - - return ret < 0 ? ret : 0; -} - -int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) -{ - struct bpos end; - int ret; - - ret = bch2_extent_atomic_end(iter, k, &end); - if (ret) - return ret; - - bch2_cut_back(end, k); - return 0; -} - -int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter) -{ - struct bpos end; - int ret; - - ret = bch2_extent_atomic_end(iter, k, &end); - if (ret) - return ret; - - return !bkey_cmp(end, k->k.p); -} - -enum btree_insert_ret -bch2_extent_can_insert(struct btree_trans *trans, - struct btree_insert_entry *insert, - unsigned *u64s) -{ - struct btree_iter_level *l = &insert->iter->l[0]; - struct btree_node_iter node_iter = l->iter; - enum bch_extent_overlap overlap; - struct bkey_packed *_k; - struct bkey unpacked; - struct bkey_s_c k; - int sectors; - - /* - * We avoid creating whiteouts whenever possible when deleting, but - * those optimizations mean we may potentially insert two whiteouts - * instead of one (when we overlap with the front of one extent and the - * back of another): - */ - if (bkey_whiteout(&insert->k->k)) - *u64s += BKEY_U64s; - - _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_discard); - if (!_k) - return BTREE_INSERT_OK; - - k = bkey_disassemble(l->b, _k, &unpacked); - - overlap = bch2_extent_overlap(&insert->k->k, k.k); - - /* account for having to split existing extent: */ - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) - *u64s += _k->u64s; - - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && - (sectors = bch2_extent_is_compressed(k))) { - int flags = trans->flags & BTREE_INSERT_NOFAIL - ? BCH_DISK_RESERVATION_NOFAIL : 0; - - switch (bch2_disk_reservation_add(trans->c, - trans->disk_res, - sectors, flags)) { - case 0: - break; - case -ENOSPC: - return BTREE_INSERT_ENOSPC; - default: - BUG(); - } - } - - return BTREE_INSERT_OK; -} - -static void verify_extent_nonoverlapping(struct bch_fs *c, - struct btree *b, - struct btree_node_iter *_iter, - struct bkey_i *insert) -{ -#ifdef CONFIG_BCACHEFS_DEBUG - struct btree_node_iter iter; - struct bkey_packed *k; - struct bkey uk; - - if (!expensive_debug_checks(c)) - return; - - iter = *_iter; - k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); - - iter = *_iter; - k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); -#if 0 - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0); -#else - if (k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) { - char buf1[100]; - char buf2[100]; - - bch2_bkey_to_text(&PBUF(buf1), &insert->k); - bch2_bkey_to_text(&PBUF(buf2), &uk); - - bch2_dump_btree_node(b); - panic("insert > next :\n" - "insert %s\n" - "next %s\n", - buf1, buf2); - } -#endif - -#endif -} - -static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert) -{ - struct btree_iter_level *l = &iter->l[0]; - struct bkey_packed *k = - bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b)); - - BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b)); - - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - verify_extent_nonoverlapping(c, l->b, &l->iter, insert); - - if (debug_check_bkeys(c)) - bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert)); - - bch2_bset_insert(l->b, &l->iter, k, insert, 0); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s); -} - -static void -extent_squash(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert, - struct bkey_packed *_k, struct bkey_s k, - enum bch_extent_overlap overlap) -{ - struct btree_iter_level *l = &iter->l[0]; - int u64s_delta; - - switch (overlap) { - case BCH_EXTENT_OVERLAP_FRONT: - /* insert overlaps with start of k: */ - u64s_delta = bch2_cut_front_s(insert->k.p, k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - EBUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - break; - - case BCH_EXTENT_OVERLAP_BACK: - /* insert overlaps with end of k: */ - u64s_delta = bch2_cut_back_s(bkey_start_pos(&insert->k), k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - EBUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - - /* - * As the auxiliary tree is indexed by the end of the - * key and we've just changed the end, update the - * auxiliary tree. - */ - bch2_bset_fix_invalidated_key(l->b, _k); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, _k->u64s, _k->u64s); - break; - - case BCH_EXTENT_OVERLAP_ALL: { - /* The insert key completely covers k, invalidate k */ - if (!bkey_whiteout(k.k)) - btree_account_key_drop(l->b, _k); - - k.k->size = 0; - k.k->type = KEY_TYPE_deleted; - - if (_k >= btree_bset_last(l->b)->start) { - unsigned u64s = _k->u64s; - - bch2_bset_delete(l->b, _k, _k->u64s); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, u64s, 0); - } else { - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } - - break; - } - case BCH_EXTENT_OVERLAP_MIDDLE: { - struct bkey_on_stack split; - - bkey_on_stack_init(&split); - bkey_on_stack_realloc(&split, c, k.k->u64s); - - /* - * The insert key falls 'in the middle' of k - * The insert key splits k in 3: - * - start only in k, preserve - * - middle common section, invalidate in k - * - end only in k, preserve - * - * We update the old key to preserve the start, - * insert will be the new common section, - * we manually insert the end that we are preserving. - * - * modify k _before_ doing the insert (which will move - * what k points to) - */ - bkey_reassemble(split.k, k.s_c); - split.k->k.needs_whiteout |= bkey_written(l->b, _k); - - bch2_cut_back(bkey_start_pos(&insert->k), split.k); - BUG_ON(bkey_deleted(&split.k->k)); - - u64s_delta = bch2_cut_front_s(insert->k.p, k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - BUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - - extent_bset_insert(c, iter, split.k); - bkey_on_stack_exit(&split, c); - break; - } - } -} - -/** - * bch_extent_insert_fixup - insert a new extent and deal with overlaps - * - * this may result in not actually doing the insert, or inserting some subset - * of the insert key. For cmpxchg operations this is where that logic lives. - * - * All subsets of @insert that need to be inserted are inserted using - * bch2_btree_insert_and_journal(). If @b or @res fills up, this function - * returns false, setting @iter->pos for the prefix of @insert that actually got - * inserted. - * - * BSET INVARIANTS: this function is responsible for maintaining all the - * invariants for bsets of extents in memory. things get really hairy with 0 - * size extents - * - * within one bset: - * - * bkey_start_pos(bkey_next(k)) >= k - * or bkey_start_offset(bkey_next(k)) >= k->offset - * - * i.e. strict ordering, no overlapping extents. - * - * multiple bsets (i.e. full btree node): - * - * ∀ k, j - * k.size != 0 ∧ j.size != 0 → - * ¬ (k > bkey_start_pos(j) ∧ k < j) - * - * i.e. no two overlapping keys _of nonzero size_ - * - * We can't realistically maintain this invariant for zero size keys because of - * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j - * there may be another 0 size key between them in another bset, and it will - * thus overlap with the merged key. - * - * In addition, the end of iter->pos indicates how much has been processed. - * If the end of iter->pos is not the same as the end of insert, then - * key insertion needs to continue/be retried. - */ -void bch2_insert_fixup_extent(struct btree_trans *trans, - struct btree_insert_entry *insert_entry) -{ - struct bch_fs *c = trans->c; - struct btree_iter *iter = insert_entry->iter; - struct bkey_i *insert = insert_entry->k; - struct btree_iter_level *l = &iter->l[0]; - struct btree_node_iter node_iter = l->iter; - bool deleting = bkey_whiteout(&insert->k); - bool update_journal = !deleting; - bool update_btree = !deleting; - struct bkey_i whiteout = *insert; - struct bkey_packed *_k; - struct bkey unpacked; - - EBUG_ON(iter->level); - EBUG_ON(!insert->k.size); - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); - - while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, - KEY_TYPE_discard))) { - struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); - struct bpos cur_end = bpos_min(insert->k.p, k.k->p); - enum bch_extent_overlap overlap = - bch2_extent_overlap(&insert->k, k.k); - - if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) - break; - - if (!bkey_whiteout(k.k)) - update_journal = true; - - if (!update_journal) { - bch2_cut_front(cur_end, insert); - bch2_cut_front(cur_end, &whiteout); - bch2_btree_iter_set_pos_same_leaf(iter, cur_end); - goto next; - } - - /* - * When deleting, if possible just do it by switching the type - * of the key we're deleting, instead of creating and inserting - * a new whiteout: - */ - if (deleting && - !update_btree && - !bkey_cmp(insert->k.p, k.k->p) && - !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { - if (!bkey_whiteout(k.k)) { - btree_account_key_drop(l->b, _k); - _k->type = KEY_TYPE_discard; - reserve_whiteout(l->b, _k); - bch2_btree_iter_fix_key_modified(iter, - l->b, _k); - } - break; - } - - if (k.k->needs_whiteout || bkey_written(l->b, _k)) { - insert->k.needs_whiteout = true; - update_btree = true; - } - - if (update_btree && - overlap == BCH_EXTENT_OVERLAP_ALL && - bkey_whiteout(k.k) && - k.k->needs_whiteout) { - unreserve_whiteout(l->b, _k); - _k->needs_whiteout = false; - } - - extent_squash(c, iter, insert, _k, k, overlap); - - if (!update_btree) - bch2_cut_front(cur_end, insert); -next: - node_iter = l->iter; - - if (overlap == BCH_EXTENT_OVERLAP_FRONT || - overlap == BCH_EXTENT_OVERLAP_MIDDLE) - break; - } - - l->iter = node_iter; - bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p); - - if (update_btree) { - if (deleting) - insert->k.type = KEY_TYPE_discard; - - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - - extent_bset_insert(c, iter, insert); - } - - if (update_journal) { - struct bkey_i *k = !deleting ? insert : &whiteout; - - if (deleting) - k->k.type = KEY_TYPE_discard; - - EBUG_ON(bkey_deleted(&k->k) || !k->k.size); - - bch2_btree_journal_key(trans, iter, k); - } - - bch2_cut_front(insert->k.p, insert); -} - const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) { return bch2_bkey_ptrs_invalid(c, k); |
