diff options
| author | Christoph Hellwig <hch@lst.de> | 2025-10-23 17:17:02 +0200 |
|---|---|---|
| committer | Carlos Maiolino <cem@kernel.org> | 2025-10-31 12:06:03 +0100 |
| commit | 83bac569c762651ac6dff9a86f54ecc13d911f7d (patch) | |
| tree | 7e0047a7af06091ab8298ca56dafdc86bbd9dcaf /fs/xfs/xfs_zone_gc.c | |
| parent | xfs: fix locking in xchk_nlinks_collect_dir (diff) | |
| download | linux-83bac569c762651ac6dff9a86f54ecc13d911f7d.tar.gz linux-83bac569c762651ac6dff9a86f54ecc13d911f7d.zip | |
xfs: prevent gc from picking the same zone twice
When we are picking a zone for gc it might already be in the pipeline
which can lead to us moving the same data twice resulting in in write
amplification and a very unfortunate case where we keep on garbage
collecting the zone we just filled with migrated data stopping all
forward progress.
Fix this by introducing a count of on-going GC operations on a zone, and
skip any zone with ongoing GC when picking a new victim.
Fixes: 080d01c41 ("xfs: implement zoned garbage collection")
Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Co-developed-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Diffstat (limited to '')
| -rw-r--r-- | fs/xfs/xfs_zone_gc.c | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 109877d9a6bf..4ade54445532 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -114,6 +114,8 @@ struct xfs_gc_bio { /* Open Zone being written to */ struct xfs_open_zone *oz; + struct xfs_rtgroup *victim_rtg; + /* Bio used for reads and writes, including the bvec used by it */ struct bio_vec bv; struct bio bio; /* must be last */ @@ -264,6 +266,7 @@ xfs_zone_gc_iter_init( iter->rec_count = 0; iter->rec_idx = 0; iter->victim_rtg = victim_rtg; + atomic_inc(&victim_rtg->rtg_gccount); } /* @@ -362,6 +365,7 @@ xfs_zone_gc_query( return 0; done: + atomic_dec(&iter->victim_rtg->rtg_gccount); xfs_rtgroup_rele(iter->victim_rtg); iter->victim_rtg = NULL; return 0; @@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from( if (!rtg) continue; + /* + * If the zone is already undergoing GC, don't pick it again. + * + * This prevents us from picking one of the zones for which we + * already submitted GC I/O, but for which the remapping hasn't + * concluded yet. This won't cause data corruption, but + * increases write amplification and slows down GC, so this is + * a bad thing. + */ + if (atomic_read(&rtg->rtg_gccount)) { + xfs_rtgroup_rele(rtg); + continue; + } + /* skip zones that are just waiting for a reset */ if (rtg_rmap(rtg)->i_used_blocks == 0 || rtg_rmap(rtg)->i_used_blocks >= victim_used) { @@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk( chunk->scratch = &data->scratch[data->scratch_idx]; chunk->data = data; chunk->oz = oz; + chunk->victim_rtg = iter->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; @@ -710,6 +731,8 @@ static void xfs_zone_gc_free_chunk( struct xfs_gc_bio *chunk) { + atomic_dec(&chunk->victim_rtg->rtg_gccount); + xfs_rtgroup_rele(chunk->victim_rtg); list_del(&chunk->entry); xfs_open_zone_put(chunk->oz); xfs_irele(chunk->ip); @@ -770,6 +793,10 @@ xfs_zone_gc_split_write( split_chunk->oz = chunk->oz; atomic_inc(&chunk->oz->oz_ref); + split_chunk->victim_rtg = chunk->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); + chunk->offset += split_len; chunk->len -= split_len; chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); |
