From 7aaff708a768144ec6459f0a58301be1a6b982fc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 May 2022 20:36:47 -0600 Subject: io_uring: move cancelation into its own file This also helps cleanup the io_uring.h cancel parts, as we can make things static in the cancel.c file, mostly. Signed-off-by: Jens Axboe --- io_uring/cancel.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 io_uring/cancel.c (limited to 'io_uring/cancel.c') diff --git a/io_uring/cancel.c b/io_uring/cancel.c new file mode 100644 index 000000000000..83cceb52d82d --- /dev/null +++ b/io_uring/cancel.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "io_uring_types.h" +#include "io_uring.h" +#include "tctx.h" +#include "poll.h" +#include "timeout.h" +#include "cancel.h" + +struct io_cancel { + struct file *file; + u64 addr; + u32 flags; + s32 fd; +}; + +#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ + IORING_ASYNC_CANCEL_ANY) + +static bool io_cancel_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct io_cancel_data *cd = data; + + if (req->ctx != cd->ctx) + return false; + if (cd->flags & IORING_ASYNC_CANCEL_ANY) { + ; + } else if (cd->flags & IORING_ASYNC_CANCEL_FD) { + if (req->file != cd->file) + return false; + } else { + if (req->cqe.user_data != cd->data) + return false; + } + if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { + if (cd->seq == req->work.cancel_seq) + return false; + req->work.cancel_seq = cd->seq; + } + return true; +} + +static int io_async_cancel_one(struct io_uring_task *tctx, + struct io_cancel_data *cd) +{ + enum io_wq_cancel cancel_ret; + int ret = 0; + bool all; + + if (!tctx || !tctx->io_wq) + return -ENOENT; + + all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); + cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); + switch (cancel_ret) { + case IO_WQ_CANCEL_OK: + ret = 0; + break; + case IO_WQ_CANCEL_RUNNING: + ret = -EALREADY; + break; + case IO_WQ_CANCEL_NOTFOUND: + ret = -ENOENT; + break; + } + + return ret; +} + +int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) +{ + struct io_ring_ctx *ctx = req->ctx; + int ret; + + WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); + + ret = io_async_cancel_one(req->task->io_uring, cd); + /* + * Fall-through even for -EALREADY, as we may have poll armed + * that need unarming. + */ + if (!ret) + return 0; + + spin_lock(&ctx->completion_lock); + ret = io_poll_cancel(ctx, cd); + if (ret != -ENOENT) + goto out; + if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) + ret = io_timeout_cancel(ctx, cd); +out: + spin_unlock(&ctx->completion_lock); + return ret; +} + + +int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_cancel *cancel = io_kiocb_to_cmd(req); + + if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) + return -EINVAL; + if (sqe->off || sqe->len || sqe->splice_fd_in) + return -EINVAL; + + cancel->addr = READ_ONCE(sqe->addr); + cancel->flags = READ_ONCE(sqe->cancel_flags); + if (cancel->flags & ~CANCEL_FLAGS) + return -EINVAL; + if (cancel->flags & IORING_ASYNC_CANCEL_FD) { + if (cancel->flags & IORING_ASYNC_CANCEL_ANY) + return -EINVAL; + cancel->fd = READ_ONCE(sqe->fd); + } + + return 0; +} + +static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, + unsigned int issue_flags) +{ + bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); + struct io_ring_ctx *ctx = cd->ctx; + struct io_tctx_node *node; + int ret, nr = 0; + + do { + ret = io_try_cancel(req, cd); + if (ret == -ENOENT) + break; + if (!all) + return ret; + nr++; + } while (1); + + /* slow path, try all io-wq's */ + io_ring_submit_lock(ctx, issue_flags); + ret = -ENOENT; + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + ret = io_async_cancel_one(tctx, cd); + if (ret != -ENOENT) { + if (!all) + break; + nr++; + } + } + io_ring_submit_unlock(ctx, issue_flags); + return all ? nr : ret; +} + +int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_cancel *cancel = io_kiocb_to_cmd(req); + struct io_cancel_data cd = { + .ctx = req->ctx, + .data = cancel->addr, + .flags = cancel->flags, + .seq = atomic_inc_return(&req->ctx->cancel_seq), + }; + int ret; + + if (cd.flags & IORING_ASYNC_CANCEL_FD) { + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, cancel->fd, + issue_flags); + else + req->file = io_file_get_normal(req, cancel->fd); + if (!req->file) { + ret = -EBADF; + goto done; + } + cd.file = req->file; + } + + ret = __io_async_cancel(&cd, req, issue_flags); +done: + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_OK; +} -- cgit v1.2.3 From 38513c464d3d45b4088c82f6e42d9cdbc5ee57e6 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Thu, 16 Jun 2022 10:22:02 +0100 Subject: io_uring: switch cancel_hash to use per entry spinlock Add a new io_hash_bucket structure so that each bucket in cancel_hash has separate spinlock. Use per entry lock for cancel_hash, this removes some completion lock invocation and remove contension between different cancel_hash entries. Signed-off-by: Hao Xu Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/05d1e135b0c8bce9d1441e6346776589e5783e26.1655371007.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/cancel.c | 14 +++++++-- io_uring/cancel.h | 6 ++++ io_uring/fdinfo.c | 9 ++++-- io_uring/io_uring.c | 9 ++++-- io_uring/io_uring_types.h | 2 +- io_uring/poll.c | 80 +++++++++++++++++++++++++++++------------------ 6 files changed, 80 insertions(+), 40 deletions(-) (limited to 'io_uring/cancel.c') diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 83cceb52d82d..6f2888388a40 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -93,14 +93,14 @@ int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) if (!ret) return 0; - spin_lock(&ctx->completion_lock); ret = io_poll_cancel(ctx, cd); if (ret != -ENOENT) goto out; + spin_lock(&ctx->completion_lock); if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) ret = io_timeout_cancel(ctx, cd); -out: spin_unlock(&ctx->completion_lock); +out: return ret; } @@ -192,3 +192,13 @@ done: io_req_set_res(req, ret, 0); return IOU_OK; } + +void init_hash_table(struct io_hash_bucket *hash_table, unsigned size) +{ + unsigned int i; + + for (i = 0; i < size; i++) { + spin_lock_init(&hash_table[i].lock); + INIT_HLIST_HEAD(&hash_table[i].list); + } +} diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 4f35d8696325..556a7dcf160e 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -4,3 +4,9 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd); +void init_hash_table(struct io_hash_bucket *hash_table, unsigned size); + +struct io_hash_bucket { + spinlock_t lock; + struct hlist_head list; +} ____cacheline_aligned_in_smp; diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index fcedde4b4b1e..f941c73f5502 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -13,6 +13,7 @@ #include "io_uring.h" #include "sqpoll.h" #include "fdinfo.h" +#include "cancel.h" #ifdef CONFIG_PROC_FS static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, @@ -157,17 +158,19 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, mutex_unlock(&ctx->uring_lock); seq_puts(m, "PollList:\n"); - spin_lock(&ctx->completion_lock); for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list = &ctx->cancel_hash[i]; + struct io_hash_bucket *hb = &ctx->cancel_hash[i]; struct io_kiocb *req; - hlist_for_each_entry(req, list, hash_node) + spin_lock(&hb->lock); + hlist_for_each_entry(req, &hb->list, hash_node) seq_printf(m, " op=%d, task_works=%d\n", req->opcode, task_work_pending(req->task)); + spin_unlock(&hb->lock); } seq_puts(m, "CqOverflowList:\n"); + spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { struct io_uring_cqe *cqe = &ocqe->cqe; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 957a5bc1b528..ac6946e3f174 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -89,6 +89,7 @@ #include "fdinfo.h" #include "kbuf.h" #include "rsrc.h" +#include "cancel.h" #include "timeout.h" #include "poll.h" @@ -260,11 +261,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) if (hash_bits <= 0) hash_bits = 1; ctx->cancel_hash_bits = hash_bits; - ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head), - GFP_KERNEL); + ctx->cancel_hash = + kmalloc((1U << hash_bits) * sizeof(struct io_hash_bucket), + GFP_KERNEL); if (!ctx->cancel_hash) goto err; - __hash_init(ctx->cancel_hash, 1U << hash_bits); + + init_hash_table(ctx->cancel_hash, 1U << hash_bits); ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); if (!ctx->dummy_ubuf) diff --git a/io_uring/io_uring_types.h b/io_uring/io_uring_types.h index 4576ea8cad2e..1f8db2dd7af7 100644 --- a/io_uring/io_uring_types.h +++ b/io_uring/io_uring_types.h @@ -224,7 +224,7 @@ struct io_ring_ctx { * manipulate the list, hence no extra locking is needed there. */ struct io_wq_work_list iopoll_list; - struct hlist_head *cancel_hash; + struct io_hash_bucket *cancel_hash; unsigned cancel_hash_bits; bool poll_multi_queue; diff --git a/io_uring/poll.c b/io_uring/poll.c index fdb6b1101ffc..1511a26b412d 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -19,6 +19,7 @@ #include "opdef.h" #include "kbuf.h" #include "poll.h" +#include "cancel.h" struct io_poll_update { struct file *file; @@ -73,10 +74,22 @@ static struct io_poll *io_poll_get_single(struct io_kiocb *req) static void io_poll_req_insert(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - struct hlist_head *list; + u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits); + struct io_hash_bucket *hb = &ctx->cancel_hash[index]; - list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)]; - hlist_add_head(&req->hash_node, list); + spin_lock(&hb->lock); + hlist_add_head(&req->hash_node, &hb->list); + spin_unlock(&hb->lock); +} + +static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) +{ + u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits); + spinlock_t *lock = &ctx->cancel_hash[index].lock; + + spin_lock(lock); + hash_del(&req->hash_node); + spin_unlock(lock); } static void io_init_poll_iocb(struct io_poll *poll, __poll_t events, @@ -220,8 +233,8 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) } io_poll_remove_entries(req); + io_poll_req_delete(req, ctx); spin_lock(&ctx->completion_lock); - hash_del(&req->hash_node); req->cqe.flags = 0; __io_req_complete_post(req); io_commit_cqring(ctx); @@ -231,7 +244,6 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) static void io_apoll_task_func(struct io_kiocb *req, bool *locked) { - struct io_ring_ctx *ctx = req->ctx; int ret; ret = io_poll_check_events(req, locked); @@ -239,9 +251,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) return; io_poll_remove_entries(req); - spin_lock(&ctx->completion_lock); - hash_del(&req->hash_node); - spin_unlock(&ctx->completion_lock); + io_poll_req_delete(req, req->ctx); if (!ret) io_req_task_submit(req, locked); @@ -437,9 +447,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req, return 0; } - spin_lock(&ctx->completion_lock); io_poll_req_insert(req); - spin_unlock(&ctx->completion_lock); if (mask && (poll->events & EPOLLET)) { /* can't multishot if failed, just queue the event we've got */ @@ -540,32 +548,31 @@ __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, bool found = false; int i; - spin_lock(&ctx->completion_lock); for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list; + struct io_hash_bucket *hb = &ctx->cancel_hash[i]; - list = &ctx->cancel_hash[i]; - hlist_for_each_entry_safe(req, tmp, list, hash_node) { + spin_lock(&hb->lock); + hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { if (io_match_task_safe(req, tsk, cancel_all)) { hlist_del_init(&req->hash_node); io_poll_cancel_req(req); found = true; } } + spin_unlock(&hb->lock); } - spin_unlock(&ctx->completion_lock); return found; } static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) { - struct hlist_head *list; struct io_kiocb *req; + u32 index = hash_long(cd->data, ctx->cancel_hash_bits); + struct io_hash_bucket *hb = &ctx->cancel_hash[index]; - list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)]; - hlist_for_each_entry(req, list, hash_node) { + spin_lock(&hb->lock); + hlist_for_each_entry(req, &hb->list, hash_node) { if (cd->data != req->cqe.user_data) continue; if (poll_only && req->opcode != IORING_OP_POLL_ADD) @@ -577,21 +584,21 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, } return req; } + spin_unlock(&hb->lock); return NULL; } static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) { struct io_kiocb *req; int i; for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list; + struct io_hash_bucket *hb = &ctx->cancel_hash[i]; - list = &ctx->cancel_hash[i]; - hlist_for_each_entry(req, list, hash_node) { + spin_lock(&hb->lock); + hlist_for_each_entry(req, &hb->list, hash_node) { if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && req->file != cd->file) continue; @@ -600,12 +607,12 @@ static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, req->work.cancel_seq = cd->seq; return req; } + spin_unlock(&hb->lock); } return NULL; } static bool io_poll_disarm(struct io_kiocb *req) - __must_hold(&ctx->completion_lock) { if (!io_poll_get_ownership(req)) return false; @@ -615,17 +622,23 @@ static bool io_poll_disarm(struct io_kiocb *req) } int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) { struct io_kiocb *req; + u32 index; + spinlock_t *lock; if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) req = io_poll_file_find(ctx, cd); else req = io_poll_find(ctx, false, cd); - if (!req) + if (!req) { return -ENOENT; + } else { + index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits); + lock = &ctx->cancel_hash[index].lock; + } io_poll_cancel_req(req); + spin_unlock(lock); return 0; } @@ -719,18 +732,23 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) struct io_poll_update *poll_update = io_kiocb_to_cmd(req); struct io_cancel_data cd = { .data = poll_update->old_user_data, }; struct io_ring_ctx *ctx = req->ctx; + u32 index = hash_long(cd.data, ctx->cancel_hash_bits); + spinlock_t *lock = &ctx->cancel_hash[index].lock; struct io_kiocb *preq; int ret2, ret = 0; bool locked; - spin_lock(&ctx->completion_lock); preq = io_poll_find(ctx, true, &cd); - if (!preq || !io_poll_disarm(preq)) { - spin_unlock(&ctx->completion_lock); - ret = preq ? -EALREADY : -ENOENT; + if (!preq) { + ret = -ENOENT; + goto out; + } + ret2 = io_poll_disarm(preq); + spin_unlock(lock); + if (!ret2) { + ret = -EALREADY; goto out; } - spin_unlock(&ctx->completion_lock); if (poll_update->update_events || poll_update->update_user_data) { /* only mask one event flags, keep behavior flags */ -- cgit v1.2.3 From 4dfab8abb4721da278a2ccd45c1b6a69f8a9dd14 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 10:22:04 +0100 Subject: io_uring: clean up io_try_cancel Get rid of an unnecessary extra goto in io_try_cancel() and simplify the function. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/48cf5417b43a8386c6c364dba1ad9b4c7382d158.1655371007.git.asml.silence@gmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- io_uring/cancel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'io_uring/cancel.c') diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 6f2888388a40..a253e2ad22eb 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -95,12 +95,12 @@ int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) ret = io_poll_cancel(ctx, cd); if (ret != -ENOENT) - goto out; + return ret; + spin_lock(&ctx->completion_lock); if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) ret = io_timeout_cancel(ctx, cd); spin_unlock(&ctx->completion_lock); -out: return ret; } -- cgit v1.2.3 From e6f89be61410ff5a0e690d87d7a308e81f0f5a71 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 10:22:10 +0100 Subject: io_uring: introduce a struct for hash table Instead of passing around a pointer to hash buckets, add a bit of type safety and wrap it into a structure. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d65bc3faba537ec2aca9eabf334394936d44bd28.1655371007.git.asml.silence@gmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- io_uring/cancel.c | 6 +++--- io_uring/cancel.h | 7 +------ io_uring/fdinfo.c | 4 ++-- io_uring/io_uring.c | 29 +++++++++++++++++------------ io_uring/io_uring_types.h | 13 +++++++++++-- io_uring/poll.c | 40 ++++++++++++++++++++++------------------ 6 files changed, 56 insertions(+), 43 deletions(-) (limited to 'io_uring/cancel.c') diff --git a/io_uring/cancel.c b/io_uring/cancel.c index a253e2ad22eb..f28f0a7d1272 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -193,12 +193,12 @@ done: return IOU_OK; } -void init_hash_table(struct io_hash_bucket *hash_table, unsigned size) +void init_hash_table(struct io_hash_table *table, unsigned size) { unsigned int i; for (i = 0; i < size; i++) { - spin_lock_init(&hash_table[i].lock); - INIT_HLIST_HEAD(&hash_table[i].list); + spin_lock_init(&table->hbs[i].lock); + INIT_HLIST_HEAD(&table->hbs[i].list); } } diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 556a7dcf160e..fd4cb1a2595d 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -4,9 +4,4 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd); -void init_hash_table(struct io_hash_bucket *hash_table, unsigned size); - -struct io_hash_bucket { - spinlock_t lock; - struct hlist_head list; -} ____cacheline_aligned_in_smp; +void init_hash_table(struct io_hash_table *table, unsigned size); diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f941c73f5502..344e7d90d557 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -158,8 +158,8 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, mutex_unlock(&ctx->uring_lock); seq_puts(m, "PollList:\n"); - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct io_hash_bucket *hb = &ctx->cancel_hash[i]; + for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { + struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; struct io_kiocb *req; spin_lock(&hb->lock); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 06772139b7da..0b3851a0db2b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -241,11 +241,23 @@ static __cold void io_fallback_req_func(struct work_struct *work) percpu_ref_put(&ctx->refs); } +static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits) +{ + unsigned hash_buckets = 1U << bits; + size_t hash_size = hash_buckets * sizeof(table->hbs[0]); + + table->hbs = kmalloc(hash_size, GFP_KERNEL); + if (!table->hbs) + return -ENOMEM; + + table->hash_bits = bits; + init_hash_table(table, hash_buckets); + return 0; +} + static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) { struct io_ring_ctx *ctx; - unsigned hash_buckets; - size_t hash_size; int hash_bits; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -261,16 +273,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) */ hash_bits = ilog2(p->cq_entries) - 5; hash_bits = clamp(hash_bits, 1, 8); - hash_buckets = 1U << hash_bits; - hash_size = hash_buckets * sizeof(struct io_hash_bucket); - - ctx->cancel_hash_bits = hash_bits; - ctx->cancel_hash = kmalloc(hash_size, GFP_KERNEL); - if (!ctx->cancel_hash) + if (io_alloc_hash_table(&ctx->cancel_table, hash_bits)) goto err; - init_hash_table(ctx->cancel_hash, hash_buckets); - ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); if (!ctx->dummy_ubuf) goto err; @@ -311,7 +316,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) return ctx; err: kfree(ctx->dummy_ubuf); - kfree(ctx->cancel_hash); + kfree(ctx->cancel_table.hbs); kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); @@ -2487,7 +2492,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_req_caches_free(ctx); if (ctx->hash_map) io_wq_put_hash(ctx->hash_map); - kfree(ctx->cancel_hash); + kfree(ctx->cancel_table.hbs); kfree(ctx->dummy_ubuf); kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); diff --git a/io_uring/io_uring_types.h b/io_uring/io_uring_types.h index 8b00243abf65..d3b9bde9c702 100644 --- a/io_uring/io_uring_types.h +++ b/io_uring/io_uring_types.h @@ -9,6 +9,16 @@ #include "io-wq.h" #include "filetable.h" +struct io_hash_bucket { + spinlock_t lock; + struct hlist_head list; +} ____cacheline_aligned_in_smp; + +struct io_hash_table { + struct io_hash_bucket *hbs; + unsigned hash_bits; +}; + struct io_uring { u32 head ____cacheline_aligned_in_smp; u32 tail ____cacheline_aligned_in_smp; @@ -224,8 +234,7 @@ struct io_ring_ctx { * manipulate the list, hence no extra locking is needed there. */ struct io_wq_work_list iopoll_list; - struct io_hash_bucket *cancel_hash; - unsigned cancel_hash_bits; + struct io_hash_table cancel_table; bool poll_multi_queue; struct list_head io_buffers_comp; diff --git a/io_uring/poll.c b/io_uring/poll.c index 96199c999fe6..ea6466388ed9 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -73,9 +73,9 @@ static struct io_poll *io_poll_get_single(struct io_kiocb *req) static void io_poll_req_insert(struct io_kiocb *req) { - struct io_ring_ctx *ctx = req->ctx; - u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits); - struct io_hash_bucket *hb = &ctx->cancel_hash[index]; + struct io_hash_table *table = &req->ctx->cancel_table; + u32 index = hash_long(req->cqe.user_data, table->hash_bits); + struct io_hash_bucket *hb = &table->hbs[index]; spin_lock(&hb->lock); hlist_add_head(&req->hash_node, &hb->list); @@ -84,8 +84,9 @@ static void io_poll_req_insert(struct io_kiocb *req) static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) { - u32 index = hash_long(req->cqe.user_data, ctx->cancel_hash_bits); - spinlock_t *lock = &ctx->cancel_hash[index].lock; + struct io_hash_table *table = &req->ctx->cancel_table; + u32 index = hash_long(req->cqe.user_data, table->hash_bits); + spinlock_t *lock = &table->hbs[index].lock; spin_lock(lock); hash_del(&req->hash_node); @@ -539,13 +540,15 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, bool cancel_all) { + struct io_hash_table *table = &ctx->cancel_table; + unsigned nr_buckets = 1U << table->hash_bits; struct hlist_node *tmp; struct io_kiocb *req; bool found = false; int i; - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct io_hash_bucket *hb = &ctx->cancel_hash[i]; + for (i = 0; i < nr_buckets; i++) { + struct io_hash_bucket *hb = &table->hbs[i]; spin_lock(&hb->lock); hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { @@ -562,12 +565,12 @@ __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, struct io_cancel_data *cd, - struct io_hash_bucket hash_table[], + struct io_hash_table *table, struct io_hash_bucket **out_bucket) { struct io_kiocb *req; - u32 index = hash_long(cd->data, ctx->cancel_hash_bits); - struct io_hash_bucket *hb = &hash_table[index]; + u32 index = hash_long(cd->data, table->hash_bits); + struct io_hash_bucket *hb = &table->hbs[index]; *out_bucket = NULL; @@ -591,16 +594,17 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, struct io_cancel_data *cd, - struct io_hash_bucket hash_table[], + struct io_hash_table *table, struct io_hash_bucket **out_bucket) { + unsigned nr_buckets = 1U << table->hash_bits; struct io_kiocb *req; int i; *out_bucket = NULL; - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct io_hash_bucket *hb = &hash_table[i]; + for (i = 0; i < nr_buckets; i++) { + struct io_hash_bucket *hb = &table->hbs[i]; spin_lock(&hb->lock); hlist_for_each_entry(req, &hb->list, hash_node) { @@ -628,15 +632,15 @@ static bool io_poll_disarm(struct io_kiocb *req) } static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, - struct io_hash_bucket hash_table[]) + struct io_hash_table *table) { struct io_hash_bucket *bucket; struct io_kiocb *req; if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) - req = io_poll_file_find(ctx, cd, ctx->cancel_hash, &bucket); + req = io_poll_file_find(ctx, cd, table, &bucket); else - req = io_poll_find(ctx, false, cd, ctx->cancel_hash, &bucket); + req = io_poll_find(ctx, false, cd, table, &bucket); if (req) io_poll_cancel_req(req); @@ -647,7 +651,7 @@ static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) { - return __io_poll_cancel(ctx, cd, ctx->cancel_hash); + return __io_poll_cancel(ctx, cd, &ctx->cancel_table); } static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, @@ -745,7 +749,7 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) int ret2, ret = 0; bool locked; - preq = io_poll_find(ctx, true, &cd, ctx->cancel_hash, &bucket); + preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); if (preq) ret2 = io_poll_disarm(preq); if (bucket) -- cgit v1.2.3 From 5d7943d99df9326c7b02f773b2d6f09709c30594 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 10:22:11 +0100 Subject: io_uring: propagate locking state to poll cancel Poll cancellation will be soon need to grab ->uring_lock inside, pass the locking state, i.e. issue_flags, inside the cancellation functions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b86781d047727c07163443b57551a3fa57c7c5e1.1655371007.git.asml.silence@gmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- io_uring/cancel.c | 7 ++++--- io_uring/cancel.h | 3 ++- io_uring/poll.c | 3 ++- io_uring/poll.h | 3 ++- io_uring/timeout.c | 3 ++- 5 files changed, 12 insertions(+), 7 deletions(-) (limited to 'io_uring/cancel.c') diff --git a/io_uring/cancel.c b/io_uring/cancel.c index f28f0a7d1272..f07bfd27c98a 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -78,7 +78,8 @@ static int io_async_cancel_one(struct io_uring_task *tctx, return ret; } -int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) +int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd, + unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; int ret; @@ -93,7 +94,7 @@ int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) if (!ret) return 0; - ret = io_poll_cancel(ctx, cd); + ret = io_poll_cancel(ctx, cd, issue_flags); if (ret != -ENOENT) return ret; @@ -136,7 +137,7 @@ static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, int ret, nr = 0; do { - ret = io_try_cancel(req, cd); + ret = io_try_cancel(req, cd, issue_flags); if (ret == -ENOENT) break; if (!all) diff --git a/io_uring/cancel.h b/io_uring/cancel.h index fd4cb1a2595d..8dd259dc383e 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -3,5 +3,6 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); -int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd); +int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd, + unsigned int issue_flags); void init_hash_table(struct io_hash_table *table, unsigned size); diff --git a/io_uring/poll.c b/io_uring/poll.c index ea6466388ed9..c4edf8794538 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -649,7 +649,8 @@ static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, return req ? 0 : -ENOENT; } -int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) +int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned issue_flags) { return __io_poll_cancel(ctx, cd, &ctx->cancel_table); } diff --git a/io_uring/poll.h b/io_uring/poll.h index cc75c1567a84..fa3e19790281 100644 --- a/io_uring/poll.h +++ b/io_uring/poll.h @@ -24,7 +24,8 @@ int io_poll_add(struct io_kiocb *req, unsigned int issue_flags); int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags); -int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd); +int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned issue_flags); int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags); bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, bool cancel_all); diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 69cca42d6835..526fc8b2e3b6 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -262,6 +262,7 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) { + unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; struct io_timeout *timeout = io_kiocb_to_cmd(req); struct io_kiocb *prev = timeout->prev; int ret = -ENOENT; @@ -273,7 +274,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) .data = prev->cqe.user_data, }; - ret = io_try_cancel(req, &cd); + ret = io_try_cancel(req, &cd, issue_flags); } io_req_set_res(req, ret ?: -ETIME, 0); io_req_complete_post(req); -- cgit v1.2.3 From 27a9d66fec77cff0e32d2ecd5d0ac7ef878a7bb0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 13:57:18 +0100 Subject: io_uring: kill extra io_uring_types.h includes io_uring/io_uring.h already includes io_uring_types.h, no need to include it every time. Kill it in a bunch of places, it prepares us for following patches. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/94d8c943fbe0ef949981c508ddcee7fc1c18850f.1655384063.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/advise.c | 1 - io_uring/cancel.c | 1 - io_uring/epoll.c | 1 - io_uring/fdinfo.c | 1 - io_uring/filetable.c | 1 - io_uring/fs.c | 1 - io_uring/io_uring.c | 1 - io_uring/kbuf.c | 1 - io_uring/msg_ring.c | 1 - io_uring/net.c | 1 - io_uring/nop.c | 1 - io_uring/opdef.c | 1 - io_uring/openclose.c | 1 - io_uring/poll.c | 1 - io_uring/rsrc.c | 1 - io_uring/rw.c | 1 - io_uring/splice.c | 1 - io_uring/sqpoll.c | 1 - io_uring/statx.c | 1 - io_uring/sync.c | 1 - io_uring/tctx.c | 1 - io_uring/timeout.c | 1 - io_uring/uring_cmd.c | 1 - io_uring/xattr.c | 1 - 24 files changed, 24 deletions(-) (limited to 'io_uring/cancel.c') diff --git a/io_uring/advise.c b/io_uring/advise.c index 8870fdf66ffb..581956934c0b 100644 --- a/io_uring/advise.c +++ b/io_uring/advise.c @@ -11,7 +11,6 @@ #include #include -#include "io_uring_types.h" #include "io_uring.h" #include "advise.h" diff --git a/io_uring/cancel.c b/io_uring/cancel.c index f07bfd27c98a..d1e7f5a955ab 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -10,7 +10,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "tctx.h" #include "poll.h" diff --git a/io_uring/epoll.c b/io_uring/epoll.c index 10853e8ed078..a8b794471d6b 100644 --- a/io_uring/epoll.c +++ b/io_uring/epoll.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "epoll.h" diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 344e7d90d557..61c35707a6cf 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "sqpoll.h" #include "fdinfo.h" diff --git a/io_uring/filetable.c b/io_uring/filetable.c index e449ceb9a848..534e1a3c625d 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "rsrc.h" #include "filetable.h" diff --git a/io_uring/fs.c b/io_uring/fs.c index aac1bc5255b0..0de4f549bb7d 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -12,7 +12,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "fs.h" diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0dbf6a74f9f3..0a1f83a936b7 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -80,7 +80,6 @@ #include "io-wq.h" -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "refs.h" diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 59e4fafeb28c..62de0dda24bf 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -11,7 +11,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "kbuf.h" diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 7c3c5f3ab06b..b02be2349652 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -7,7 +7,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "msg_ring.h" diff --git a/io_uring/net.c b/io_uring/net.c index 35d0183fe758..b77bfbfb0816 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -10,7 +10,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "kbuf.h" #include "net.h" diff --git a/io_uring/nop.c b/io_uring/nop.c index d363d8ce70a3..d956599a3c1b 100644 --- a/io_uring/nop.c +++ b/io_uring/nop.c @@ -7,7 +7,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "nop.h" diff --git a/io_uring/opdef.c b/io_uring/opdef.c index d687d33f9c0c..a7b84b43e6c2 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -8,7 +8,6 @@ #include #include -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "refs.h" diff --git a/io_uring/openclose.c b/io_uring/openclose.c index 1cbf39030970..099a5ec84dfd 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -12,7 +12,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "rsrc.h" #include "openclose.h" diff --git a/io_uring/poll.c b/io_uring/poll.c index e0c181fe6264..63aca920543b 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -13,7 +13,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "refs.h" #include "opdef.h" diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7fed3105152a..68629eba4132 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -12,7 +12,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "openclose.h" #include "rsrc.h" diff --git a/io_uring/rw.c b/io_uring/rw.c index e5ca23d0783e..f8b42f2265df 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -14,7 +14,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "kbuf.h" diff --git a/io_uring/splice.c b/io_uring/splice.c index 0e19d6330345..b013ba34bffa 100644 --- a/io_uring/splice.c +++ b/io_uring/splice.c @@ -11,7 +11,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "splice.h" diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 149d5c976f14..76d4d70c733a 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -14,7 +14,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "sqpoll.h" diff --git a/io_uring/statx.c b/io_uring/statx.c index 83b15687e9c5..6056cd7f4876 100644 --- a/io_uring/statx.c +++ b/io_uring/statx.c @@ -8,7 +8,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "statx.h" diff --git a/io_uring/sync.c b/io_uring/sync.c index 9ee8ff865521..f2102afa79ca 100644 --- a/io_uring/sync.c +++ b/io_uring/sync.c @@ -11,7 +11,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "sync.h" diff --git a/io_uring/tctx.c b/io_uring/tctx.c index a819da8fc85c..9b30fb0d3603 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "tctx.h" diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 526fc8b2e3b6..f9df359813c9 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -8,7 +8,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "refs.h" #include "cancel.h" diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index abf78918a099..0a421ed51e7e 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -6,7 +6,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "uring_cmd.h" diff --git a/io_uring/xattr.c b/io_uring/xattr.c index 79adf4efba01..b179f9acd5ac 100644 --- a/io_uring/xattr.c +++ b/io_uring/xattr.c @@ -13,7 +13,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "xattr.h" -- cgit v1.2.3 From 88f52eaad2df2cb5ab49b864d79398c9cb9a57f2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 18 Jun 2022 09:23:54 -0600 Subject: io_uring: have cancelation API accept io_uring_task directly We just use the io_kiocb passed in to find the io_uring_task, and we already pass in the ctx via cd->ctx anyway. Signed-off-by: Jens Axboe --- io_uring/cancel.c | 17 +++++++++-------- io_uring/cancel.h | 2 +- io_uring/timeout.c | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'io_uring/cancel.c') diff --git a/io_uring/cancel.c b/io_uring/cancel.c index d1e7f5a955ab..500ee5f5fd23 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -77,15 +77,15 @@ static int io_async_cancel_one(struct io_uring_task *tctx, return ret; } -int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd, +int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, unsigned issue_flags) { - struct io_ring_ctx *ctx = req->ctx; + struct io_ring_ctx *ctx = cd->ctx; int ret; - WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); + WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring); - ret = io_async_cancel_one(req->task->io_uring, cd); + ret = io_async_cancel_one(tctx, cd); /* * Fall-through even for -EALREADY, as we may have poll armed * that need unarming. @@ -104,7 +104,6 @@ int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd, return ret; } - int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_cancel *cancel = io_kiocb_to_cmd(req); @@ -127,7 +126,8 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, +static int __io_async_cancel(struct io_cancel_data *cd, + struct io_uring_task *tctx, unsigned int issue_flags) { bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); @@ -136,7 +136,7 @@ static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, int ret, nr = 0; do { - ret = io_try_cancel(req, cd, issue_flags); + ret = io_try_cancel(tctx, cd, issue_flags); if (ret == -ENOENT) break; if (!all) @@ -170,6 +170,7 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) .flags = cancel->flags, .seq = atomic_inc_return(&req->ctx->cancel_seq), }; + struct io_uring_task *tctx = req->task->io_uring; int ret; if (cd.flags & IORING_ASYNC_CANCEL_FD) { @@ -185,7 +186,7 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) cd.file = req->file; } - ret = __io_async_cancel(&cd, req, issue_flags); + ret = __io_async_cancel(&cd, tctx, issue_flags); done: if (ret < 0) req_set_fail(req); diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 2338012a5b06..1bc7e917ce94 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -16,6 +16,6 @@ struct io_cancel_data { int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); -int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd, +int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, unsigned int issue_flags); void init_hash_table(struct io_hash_table *table, unsigned size); diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 7e2c341f9762..4af074b8f6b7 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -274,7 +274,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) .data = prev->cqe.user_data, }; - ret = io_try_cancel(req, &cd, issue_flags); + ret = io_try_cancel(req->task->io_uring, &cd, issue_flags); } io_req_set_res(req, ret ?: -ETIME, 0); io_req_complete_post(req); -- cgit v1.2.3 From 7d8ca7250197096bfa9f432c1d99b0555504bbba Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 18 Jun 2022 09:47:04 -0600 Subject: io_uring: add IORING_ASYNC_CANCEL_FD_FIXED cancel flag In preparation for not having a request to pass in that carries this state, add a separate cancelation flag that allows the caller to ask for a fixed file for cancelation. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ io_uring/cancel.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'io_uring/cancel.c') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d7ae81b10893..a09a78bd7556 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -247,10 +247,12 @@ enum io_uring_op { * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the * request 'user_data' * IORING_ASYNC_CANCEL_ANY Match any request + * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) +#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) /* * send/sendmsg and recv/recvmsg flags (sqe->ioprio) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 500ee5f5fd23..da486de07029 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -24,7 +24,7 @@ struct io_cancel { }; #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ - IORING_ASYNC_CANCEL_ANY) + IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED) static bool io_cancel_cb(struct io_wq_work *work, void *data) { @@ -174,11 +174,14 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) int ret; if (cd.flags & IORING_ASYNC_CANCEL_FD) { - if (req->flags & REQ_F_FIXED_FILE) + if (req->flags & REQ_F_FIXED_FILE || + cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) { + req->flags |= REQ_F_FIXED_FILE; req->file = io_file_get_fixed(req, cancel->fd, issue_flags); - else + } else { req->file = io_file_get_normal(req, cancel->fd); + } if (!req->file) { ret = -EBADF; goto done; -- cgit v1.2.3 From 78a861b9495920f8609dee5b670dacbff09d359f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 18 Jun 2022 10:00:50 -0600 Subject: io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 15 ++++++ io_uring/cancel.c | 107 ++++++++++++++++++++++++++++++++++++++++++ io_uring/cancel.h | 2 + io_uring/io_uring.c | 6 +++ 4 files changed, 130 insertions(+) (limited to 'io_uring/cancel.c') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a09a78bd7556..094f706c93e0 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,6 +10,7 @@ #include #include +#include /* * IO submission data structure (Submission Queue Entry) @@ -428,6 +429,9 @@ enum { IORING_REGISTER_PBUF_RING = 22, IORING_UNREGISTER_PBUF_RING = 23, + /* sync cancelation API */ + IORING_REGISTER_SYNC_CANCEL = 24, + /* this goes last */ IORING_REGISTER_LAST }; @@ -563,4 +567,15 @@ struct io_uring_getevents_arg { __u64 ts; }; +/* + * Argument for IORING_REGISTER_SYNC_CANCEL + */ +struct io_uring_sync_cancel_reg { + __u64 addr; + __s32 fd; + __u32 flags; + struct __kernel_timespec timeout; + __u64 pad[4]; +}; + #endif diff --git a/io_uring/cancel.c b/io_uring/cancel.c index da486de07029..8435a1eba59a 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -206,3 +207,109 @@ void init_hash_table(struct io_hash_table *table, unsigned size) INIT_HLIST_HEAD(&table->hbs[i].list); } } + +static int __io_sync_cancel(struct io_uring_task *tctx, + struct io_cancel_data *cd, int fd) +{ + struct io_ring_ctx *ctx = cd->ctx; + + /* fixed must be grabbed every time since we drop the uring_lock */ + if ((cd->flags & IORING_ASYNC_CANCEL_FD) && + (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) { + unsigned long file_ptr; + + if (unlikely(fd > ctx->nr_user_files)) + return -EBADF; + fd = array_index_nospec(fd, ctx->nr_user_files); + file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; + cd->file = (struct file *) (file_ptr & FFS_MASK); + if (!cd->file) + return -EBADF; + } + + return __io_async_cancel(cd, tctx, 0); +} + +int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) + __must_hold(&ctx->uring_lock) +{ + struct io_cancel_data cd = { + .ctx = ctx, + .seq = atomic_inc_return(&ctx->cancel_seq), + }; + ktime_t timeout = KTIME_MAX; + struct io_uring_sync_cancel_reg sc; + struct fd f = { }; + DEFINE_WAIT(wait); + int ret; + + if (copy_from_user(&sc, arg, sizeof(sc))) + return -EFAULT; + if (sc.flags & ~CANCEL_FLAGS) + return -EINVAL; + if (sc.pad[0] || sc.pad[1] || sc.pad[2] || sc.pad[3]) + return -EINVAL; + + cd.data = sc.addr; + cd.flags = sc.flags; + + /* we can grab a normal file descriptor upfront */ + if ((cd.flags & IORING_ASYNC_CANCEL_FD) && + !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) { + f = fdget(sc.fd); + if (!f.file) + return -EBADF; + cd.file = f.file; + } + + ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); + + /* found something, done! */ + if (ret != -EALREADY) + goto out; + + if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) { + struct timespec64 ts = { + .tv_sec = sc.timeout.tv_sec, + .tv_nsec = sc.timeout.tv_nsec + }; + + timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); + } + + /* + * Keep looking until we get -ENOENT. we'll get woken everytime + * every time a request completes and will retry the cancelation. + */ + do { + cd.seq = atomic_inc_return(&ctx->cancel_seq); + + prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE); + + ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); + + if (ret != -EALREADY) + break; + + mutex_unlock(&ctx->uring_lock); + ret = io_run_task_work_sig(); + if (ret < 0) { + mutex_lock(&ctx->uring_lock); + break; + } + ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); + mutex_lock(&ctx->uring_lock); + if (!ret) { + ret = -ETIME; + break; + } + } while (1); + + finish_wait(&ctx->cq_wait, &wait); + + if (ret == -ENOENT || ret > 0) + ret = 0; +out: + fdput(f); + return ret; +} diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 1bc7e917ce94..6a59ee484d0c 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -19,3 +19,5 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, unsigned int issue_flags); void init_hash_table(struct io_hash_table *table, unsigned size); + +int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 997b915a1ff7..45538b3c3a76 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3871,6 +3871,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_unregister_pbuf_ring(ctx, arg); break; + case IORING_REGISTER_SYNC_CANCEL: + ret = -EINVAL; + if (!arg || nr_args != 1) + break; + ret = io_sync_cancel(ctx, arg); + break; default: ret = -EINVAL; break; -- cgit v1.2.3