diff options
| author | Christian Brauner <brauner@kernel.org> | 2025-03-20 15:16:08 +0100 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2025-03-20 15:16:08 +0100 |
| commit | c84042b32f275dee8e3f10cdd8973e2e879f1fc8 (patch) | |
| tree | 4d4b795d790bc75732732b86d2fee16548ec1666 /fs/iomap | |
| parent | iomap: fix inline data on buffered read (diff) | |
| parent | iomap: rework IOMAP atomic flags (diff) | |
| download | linux-c84042b32f275dee8e3f10cdd8973e2e879f1fc8.tar.gz linux-c84042b32f275dee8e3f10cdd8973e2e879f1fc8.zip | |
Merge patch series "further iomap large atomic writes changes"
John Garry <john.g.garry@oracle.com> says:
These iomap changes are spun-off the XFS large atomic writes series at
https://lore.kernel.org/linux-xfs/86a64256-497a-453b-bbba-a5ac6b4cb056@oracle.com/T/#ma99c763221de9d49ea2ccfca9ff9b8d71c8b2677
The XFS parts there are not ready yet, but it is worth having the iomap
changes queued in advance.
Some much earlier changes from that same series were already queued in the
vfs tree, and these patches rework those changes - specifically the
first patch in this series does.
The most other significant change is the patch to rework how the bio flags
are set in the DIO patch.
* patches from https://lore.kernel.org/r/20250320120250.4087011-1-john.g.garry@oracle.com:
iomap: rework IOMAP atomic flags
iomap: comment on atomic write checks in iomap_dio_bio_iter()
iomap: inline iomap_dio_bio_opflags()
Link: https://lore.kernel.org/r/20250320120250.4087011-1-john.g.garry@oracle.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/iomap')
| -rw-r--r-- | fs/iomap/direct-io.c | 125 | ||||
| -rw-r--r-- | fs/iomap/trace.h | 2 |
2 files changed, 58 insertions, 69 deletions
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 5299f70428ef..6ac7a1534f7c 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -312,27 +312,20 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, } /* - * Figure out the bio's operation flags from the dio request, the - * mapping, and whether or not we want FUA. Note that we can end up - * clearing the WRITE_THROUGH flag in the dio request. + * Use a FUA write if we need datasync semantics and this is a pure data I/O + * that doesn't require any metadata updates (including after I/O completion + * such as unwritten extent conversion) and the underlying device either + * doesn't have a volatile write cache or supports FUA. + * This allows us to avoid cache flushes on I/O completion. */ -static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio, - const struct iomap *iomap, bool use_fua, bool atomic_hw) +static inline bool iomap_dio_can_use_fua(const struct iomap *iomap, + struct iomap_dio *dio) { - blk_opf_t opflags = REQ_SYNC | REQ_IDLE; - - if (!(dio->flags & IOMAP_DIO_WRITE)) - return REQ_OP_READ; - - opflags |= REQ_OP_WRITE; - if (use_fua) - opflags |= REQ_FUA; - else - dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; - if (atomic_hw) - opflags |= REQ_ATOMIC; - - return opflags; + if (iomap->flags & (IOMAP_F_SHARED | IOMAP_F_DIRTY)) + return false; + if (!(dio->flags & IOMAP_DIO_WRITE_THROUGH)) + return false; + return !bdev_write_cache(iomap->bdev) || bdev_fua(iomap->bdev); } static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) @@ -340,52 +333,64 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) const struct iomap *iomap = &iter->iomap; struct inode *inode = iter->inode; unsigned int fs_block_size = i_blocksize(inode), pad; - bool atomic_hw = iter->flags & IOMAP_ATOMIC_HW; const loff_t length = iomap_length(iter); loff_t pos = iter->pos; - blk_opf_t bio_opf; + blk_opf_t bio_opf = REQ_SYNC | REQ_IDLE; struct bio *bio; bool need_zeroout = false; - bool use_fua = false; int nr_pages, ret = 0; u64 copied = 0; size_t orig_count; - if (atomic_hw && length != iter->len) - return -EINVAL; - if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) || !bdev_iter_is_aligned(iomap->bdev, dio->submit.iter)) return -EINVAL; - if (iomap->type == IOMAP_UNWRITTEN) { - dio->flags |= IOMAP_DIO_UNWRITTEN; - need_zeroout = true; - } + if (dio->flags & IOMAP_DIO_WRITE) { + bio_opf |= REQ_OP_WRITE; + + if (iomap->flags & IOMAP_F_ATOMIC_BIO) { + /* + * Ensure that the mapping covers the full write + * length, otherwise it won't be submitted as a single + * bio, which is required to use hardware atomics. + */ + if (length != iter->len) + return -EINVAL; + bio_opf |= REQ_ATOMIC; + } - if (iomap->flags & IOMAP_F_SHARED) - dio->flags |= IOMAP_DIO_COW; + if (iomap->type == IOMAP_UNWRITTEN) { + dio->flags |= IOMAP_DIO_UNWRITTEN; + need_zeroout = true; + } + + if (iomap->flags & IOMAP_F_SHARED) + dio->flags |= IOMAP_DIO_COW; + + if (iomap->flags & IOMAP_F_NEW) { + need_zeroout = true; + } else if (iomap->type == IOMAP_MAPPED) { + if (iomap_dio_can_use_fua(iomap, dio)) + bio_opf |= REQ_FUA; + else + dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; + } - if (iomap->flags & IOMAP_F_NEW) { - need_zeroout = true; - } else if (iomap->type == IOMAP_MAPPED) { /* - * Use a FUA write if we need datasync semantics, this is a pure - * data IO that doesn't require any metadata updates (including - * after IO completion such as unwritten extent conversion) and - * the underlying device either supports FUA or doesn't have - * a volatile write cache. This allows us to avoid cache flushes - * on IO completion. If we can't use writethrough and need to - * sync, disable in-task completions as dio completion will - * need to call generic_write_sync() which will do a blocking - * fsync / cache flush call. + * We can only do deferred completion for pure overwrites that + * don't require additional I/O at completion time. + * + * This rules out writes that need zeroing or extent conversion, + * extend the file size, or issue metadata I/O or cache flushes + * during completion processing. */ - if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) && - (dio->flags & IOMAP_DIO_WRITE_THROUGH) && - (bdev_fua(iomap->bdev) || !bdev_write_cache(iomap->bdev))) - use_fua = true; - else if (dio->flags & IOMAP_DIO_NEED_SYNC) + if (need_zeroout || (pos >= i_size_read(inode)) || + ((dio->flags & IOMAP_DIO_NEED_SYNC) && + !(bio_opf & REQ_FUA))) dio->flags &= ~IOMAP_DIO_CALLER_COMP; + } else { + bio_opf |= REQ_OP_READ; } /* @@ -400,18 +405,6 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) goto out; /* - * We can only do deferred completion for pure overwrites that - * don't require additional IO at completion. This rules out - * writes that need zeroing or extent conversion, extend - * the file size, or issue journal IO or cache flushes - * during completion processing. - */ - if (need_zeroout || - ((dio->flags & IOMAP_DIO_NEED_SYNC) && !use_fua) || - ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) - dio->flags &= ~IOMAP_DIO_CALLER_COMP; - - /* * The rules for polled IO completions follow the guidelines as the * ones we set for inline and deferred completions. If none of those * are available for this IO, clear the polled flag. @@ -428,8 +421,6 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) goto out; } - bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_hw); - nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS); do { size_t n; @@ -461,9 +452,9 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) } n = bio->bi_iter.bi_size; - if (WARN_ON_ONCE(atomic_hw && n != length)) { + if (WARN_ON_ONCE((bio_opf & REQ_ATOMIC) && n != length)) { /* - * This bio should have covered the complete length, + * An atomic write bio must cover the complete length, * which it doesn't, so error. We may need to zero out * the tail (complete FS block), similar to when * bio_iov_iter_get_pages() returns an error, above. @@ -686,10 +677,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomi.flags |= IOMAP_OVERWRITE_ONLY; } - if (dio_flags & IOMAP_DIO_ATOMIC_SW) - iomi.flags |= IOMAP_ATOMIC_SW; - else if (iocb->ki_flags & IOCB_ATOMIC) - iomi.flags |= IOMAP_ATOMIC_HW; + if (iocb->ki_flags & IOCB_ATOMIC) + iomi.flags |= IOMAP_ATOMIC; /* for data sync or sync, we need sync completion processing */ if (iocb_is_dsync(iocb)) { diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 69af89044ebd..9eab2c8ac3c5 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -99,7 +99,7 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued); { IOMAP_FAULT, "FAULT" }, \ { IOMAP_DIRECT, "DIRECT" }, \ { IOMAP_NOWAIT, "NOWAIT" }, \ - { IOMAP_ATOMIC_HW, "ATOMIC_HW" } + { IOMAP_ATOMIC, "ATOMIC" } #define IOMAP_F_FLAGS_STRINGS \ { IOMAP_F_NEW, "NEW" }, \ |
