From 36404b09aa609e00f8f0108356830c22b99b3cbf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 May 2022 06:42:08 -0600 Subject: io_uring: move msg_ring into its own file Signed-off-by: Jens Axboe --- io_uring/msg_ring.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 io_uring/msg_ring.c (limited to 'io_uring/msg_ring.c') diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c new file mode 100644 index 000000000000..3b89f9a0a0b4 --- /dev/null +++ b/io_uring/msg_ring.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include + +#include "io_uring_types.h" +#include "io_uring.h" +#include "msg_ring.h" + +struct io_msg { + struct file *file; + u64 user_data; + u32 len; +}; + +int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_msg *msg = io_kiocb_to_cmd(req); + + if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in || + sqe->buf_index || sqe->personality)) + return -EINVAL; + + msg->user_data = READ_ONCE(sqe->off); + msg->len = READ_ONCE(sqe->len); + return 0; +} + +int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_msg *msg = io_kiocb_to_cmd(req); + struct io_ring_ctx *target_ctx; + bool filled; + int ret; + + ret = -EBADFD; + if (!io_is_uring_fops(req->file)) + goto done; + + ret = -EOVERFLOW; + target_ctx = req->file->private_data; + + spin_lock(&target_ctx->completion_lock); + filled = io_fill_cqe_aux(target_ctx, msg->user_data, msg->len, 0); + io_commit_cqring(target_ctx); + spin_unlock(&target_ctx->completion_lock); + + if (filled) { + io_cqring_ev_posted(target_ctx); + ret = 0; + } + +done: + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + /* put file to avoid an attempt to IOPOLL the req */ + io_put_file(req->file); + req->file = NULL; + return IOU_OK; +} -- cgit v1.2.3 From d245bca6375bccfd589a6a7d5007df28575bb626 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 17 Jun 2022 09:48:00 +0100 Subject: io_uring: don't expose io_fill_cqe_aux() Deduplicate some code and add a helper for filling an aux CQE, locking and notification. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b7c6557c8f9dc5c4cfb01292116c682a0ff61081.1655455613.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 18 ++++++++++++++++-- io_uring/io_uring.h | 3 +-- io_uring/msg_ring.c | 11 +---------- io_uring/net.c | 20 +++++--------------- io_uring/poll.c | 24 ++++++++---------------- io_uring/rsrc.c | 14 +++++--------- 6 files changed, 36 insertions(+), 54 deletions(-) (limited to 'io_uring/msg_ring.c') diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index eeda16731795..8c1b0e0ce5bb 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -676,8 +676,8 @@ bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, s32 res, return true; } -bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, - u32 cflags) +static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, + u64 user_data, s32 res, u32 cflags) { struct io_uring_cqe *cqe; @@ -704,6 +704,20 @@ bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); } +bool io_post_aux_cqe(struct io_ring_ctx *ctx, + u64 user_data, s32 res, u32 cflags) +{ + bool filled; + + spin_lock(&ctx->completion_lock); + filled = io_fill_cqe_aux(ctx, user_data, res, cflags); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (filled) + io_cqring_ev_posted(ctx); + return filled; +} + static void __io_req_complete_put(struct io_kiocb *req) { /* diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 3f06fbae0ee9..18754fb79025 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -239,8 +239,7 @@ void io_req_complete_failed(struct io_kiocb *req, s32 res); void __io_req_complete(struct io_kiocb *req, unsigned issue_flags); void io_req_complete_post(struct io_kiocb *req); void __io_req_complete_post(struct io_kiocb *req); -bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, - u32 cflags); +bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags); void io_cqring_ev_posted(struct io_ring_ctx *ctx); void __io_commit_cqring_flush(struct io_ring_ctx *ctx); diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 3b89f9a0a0b4..7c3c5f3ab06b 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -34,7 +34,6 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) { struct io_msg *msg = io_kiocb_to_cmd(req); struct io_ring_ctx *target_ctx; - bool filled; int ret; ret = -EBADFD; @@ -43,16 +42,8 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) ret = -EOVERFLOW; target_ctx = req->file->private_data; - - spin_lock(&target_ctx->completion_lock); - filled = io_fill_cqe_aux(target_ctx, msg->user_data, msg->len, 0); - io_commit_cqring(target_ctx); - spin_unlock(&target_ctx->completion_lock); - - if (filled) { - io_cqring_ev_posted(target_ctx); + if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) ret = 0; - } done: if (ret < 0) diff --git a/io_uring/net.c b/io_uring/net.c index fe1fe920b929..35d0183fe758 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -644,22 +644,12 @@ retry: io_req_set_res(req, ret, 0); return IOU_OK; } - if (ret >= 0) { - bool filled; - - spin_lock(&ctx->completion_lock); - filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret, - IORING_CQE_F_MORE); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (filled) { - io_cqring_ev_posted(ctx); - goto retry; - } - ret = -ECANCELED; - } - return ret; + if (ret < 0) + return ret; + if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE)) + goto retry; + return -ECANCELED; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) diff --git a/io_uring/poll.c b/io_uring/poll.c index 9ae2982aef7c..e0c181fe6264 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -214,23 +214,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { __poll_t mask = mangle_poll(req->cqe.res & req->apoll_events); - bool filled; - - spin_lock(&ctx->completion_lock); - filled = io_fill_cqe_aux(ctx, req->cqe.user_data, - mask, IORING_CQE_F_MORE); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (filled) { - io_cqring_ev_posted(ctx); - continue; - } - return -ECANCELED; - } - ret = io_poll_issue(req, locked); - if (ret) - return ret; + if (!io_post_aux_cqe(ctx, req->cqe.user_data, + mask, IORING_CQE_F_MORE)) + return -ECANCELED; + } else { + ret = io_poll_issue(req, locked); + if (ret) + return ret; + } /* * Release all references, retry if someone tried to restart diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 214ff0dfa6a4..7fed3105152a 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -174,17 +174,13 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) list_del(&prsrc->list); if (prsrc->tag) { - if (ctx->flags & IORING_SETUP_IOPOLL) + if (ctx->flags & IORING_SETUP_IOPOLL) { mutex_lock(&ctx->uring_lock); - - spin_lock(&ctx->completion_lock); - io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); - - if (ctx->flags & IORING_SETUP_IOPOLL) + io_post_aux_cqe(ctx, prsrc->tag, 0, 0); mutex_unlock(&ctx->uring_lock); + } else { + io_post_aux_cqe(ctx, prsrc->tag, 0, 0); + } } rsrc_data->do_put(ctx, prsrc); -- cgit v1.2.3 From 27a9d66fec77cff0e32d2ecd5d0ac7ef878a7bb0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 13:57:18 +0100 Subject: io_uring: kill extra io_uring_types.h includes io_uring/io_uring.h already includes io_uring_types.h, no need to include it every time. Kill it in a bunch of places, it prepares us for following patches. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/94d8c943fbe0ef949981c508ddcee7fc1c18850f.1655384063.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/advise.c | 1 - io_uring/cancel.c | 1 - io_uring/epoll.c | 1 - io_uring/fdinfo.c | 1 - io_uring/filetable.c | 1 - io_uring/fs.c | 1 - io_uring/io_uring.c | 1 - io_uring/kbuf.c | 1 - io_uring/msg_ring.c | 1 - io_uring/net.c | 1 - io_uring/nop.c | 1 - io_uring/opdef.c | 1 - io_uring/openclose.c | 1 - io_uring/poll.c | 1 - io_uring/rsrc.c | 1 - io_uring/rw.c | 1 - io_uring/splice.c | 1 - io_uring/sqpoll.c | 1 - io_uring/statx.c | 1 - io_uring/sync.c | 1 - io_uring/tctx.c | 1 - io_uring/timeout.c | 1 - io_uring/uring_cmd.c | 1 - io_uring/xattr.c | 1 - 24 files changed, 24 deletions(-) (limited to 'io_uring/msg_ring.c') diff --git a/io_uring/advise.c b/io_uring/advise.c index 8870fdf66ffb..581956934c0b 100644 --- a/io_uring/advise.c +++ b/io_uring/advise.c @@ -11,7 +11,6 @@ #include #include -#include "io_uring_types.h" #include "io_uring.h" #include "advise.h" diff --git a/io_uring/cancel.c b/io_uring/cancel.c index f07bfd27c98a..d1e7f5a955ab 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -10,7 +10,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "tctx.h" #include "poll.h" diff --git a/io_uring/epoll.c b/io_uring/epoll.c index 10853e8ed078..a8b794471d6b 100644 --- a/io_uring/epoll.c +++ b/io_uring/epoll.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "epoll.h" diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 344e7d90d557..61c35707a6cf 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "sqpoll.h" #include "fdinfo.h" diff --git a/io_uring/filetable.c b/io_uring/filetable.c index e449ceb9a848..534e1a3c625d 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "rsrc.h" #include "filetable.h" diff --git a/io_uring/fs.c b/io_uring/fs.c index aac1bc5255b0..0de4f549bb7d 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -12,7 +12,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "fs.h" diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0dbf6a74f9f3..0a1f83a936b7 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -80,7 +80,6 @@ #include "io-wq.h" -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "refs.h" diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 59e4fafeb28c..62de0dda24bf 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -11,7 +11,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "kbuf.h" diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 7c3c5f3ab06b..b02be2349652 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -7,7 +7,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "msg_ring.h" diff --git a/io_uring/net.c b/io_uring/net.c index 35d0183fe758..b77bfbfb0816 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -10,7 +10,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "kbuf.h" #include "net.h" diff --git a/io_uring/nop.c b/io_uring/nop.c index d363d8ce70a3..d956599a3c1b 100644 --- a/io_uring/nop.c +++ b/io_uring/nop.c @@ -7,7 +7,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "nop.h" diff --git a/io_uring/opdef.c b/io_uring/opdef.c index d687d33f9c0c..a7b84b43e6c2 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -8,7 +8,6 @@ #include #include -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "refs.h" diff --git a/io_uring/openclose.c b/io_uring/openclose.c index 1cbf39030970..099a5ec84dfd 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -12,7 +12,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "rsrc.h" #include "openclose.h" diff --git a/io_uring/poll.c b/io_uring/poll.c index e0c181fe6264..63aca920543b 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -13,7 +13,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "refs.h" #include "opdef.h" diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7fed3105152a..68629eba4132 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -12,7 +12,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "openclose.h" #include "rsrc.h" diff --git a/io_uring/rw.c b/io_uring/rw.c index e5ca23d0783e..f8b42f2265df 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -14,7 +14,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "opdef.h" #include "kbuf.h" diff --git a/io_uring/splice.c b/io_uring/splice.c index 0e19d6330345..b013ba34bffa 100644 --- a/io_uring/splice.c +++ b/io_uring/splice.c @@ -11,7 +11,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "splice.h" diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 149d5c976f14..76d4d70c733a 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -14,7 +14,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "sqpoll.h" diff --git a/io_uring/statx.c b/io_uring/statx.c index 83b15687e9c5..6056cd7f4876 100644 --- a/io_uring/statx.c +++ b/io_uring/statx.c @@ -8,7 +8,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "statx.h" diff --git a/io_uring/sync.c b/io_uring/sync.c index 9ee8ff865521..f2102afa79ca 100644 --- a/io_uring/sync.c +++ b/io_uring/sync.c @@ -11,7 +11,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "sync.h" diff --git a/io_uring/tctx.c b/io_uring/tctx.c index a819da8fc85c..9b30fb0d3603 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -9,7 +9,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "tctx.h" diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 526fc8b2e3b6..f9df359813c9 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -8,7 +8,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "refs.h" #include "cancel.h" diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index abf78918a099..0a421ed51e7e 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -6,7 +6,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "uring_cmd.h" diff --git a/io_uring/xattr.c b/io_uring/xattr.c index 79adf4efba01..b179f9acd5ac 100644 --- a/io_uring/xattr.c +++ b/io_uring/xattr.c @@ -13,7 +13,6 @@ #include "../fs/internal.h" -#include "io_uring_types.h" #include "io_uring.h" #include "xattr.h" -- cgit v1.2.3 From e6130eba8a848a7a6ba6c534bd8f6d60749ae1a9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Jun 2022 04:47:02 -0600 Subject: io_uring: add support for passing fixed file descriptors With IORING_OP_MSG_RING, one ring can send a message to another ring. Extend that support to also allow sending a fixed file descriptor to that ring, enabling one ring to pass a registered descriptor to another one. Arguments are extended to pass in: sqe->addr3 fixed file slot in source ring sqe->file_index fixed file slot in destination ring IORING_OP_MSG_RING is extended to take a command argument in sqe->addr. If set to zero (or IORING_MSG_DATA), it sends just a message like before. If set to IORING_MSG_SEND_FD, a fixed file descriptor is sent according to the above arguments. Two common use cases for this are: 1) Server needs to be shutdown or restarted, pass file descriptors to another onei 2) Backend is split, and one accepts connections, while others then get the fd passed and handle the actual connection. Both of those are classic SCM_RIGHTS use cases, and it's not possible to support them with direct descriptors today. By default, this will post a CQE to the target ring, similarly to how IORING_MSG_DATA does it. If IORING_MSG_RING_CQE_SKIP is set, no message is posted to the target ring. The issuer is expected to notify the receiver side separately. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 17 ++++++ io_uring/msg_ring.c | 130 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 140 insertions(+), 7 deletions(-) (limited to 'io_uring/msg_ring.c') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8fe0275cdaf3..f378eabbff21 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -51,6 +51,7 @@ struct io_uring_sqe { __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; + __u32 msg_ring_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -270,6 +271,22 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) +/* + * IORING_OP_MSG_RING command types, stored in sqe->addr + */ +enum { + IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ + IORING_MSG_SEND_FD, /* send a registered fd to another ring */ +}; + +/* + * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) + * + * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not + * applicable for IORING_MSG_DATA, obviously. + */ +#define IORING_MSG_RING_CQE_SKIP (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index b02be2349652..939205b30c8b 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -3,46 +3,162 @@ #include #include #include +#include #include #include #include "io_uring.h" +#include "rsrc.h" +#include "filetable.h" #include "msg_ring.h" struct io_msg { struct file *file; u64 user_data; u32 len; + u32 cmd; + u32 src_fd; + u32 dst_fd; + u32 flags; }; +static int io_msg_ring_data(struct io_kiocb *req) +{ + struct io_ring_ctx *target_ctx = req->file->private_data; + struct io_msg *msg = io_kiocb_to_cmd(req); + + if (msg->src_fd || msg->dst_fd || msg->flags) + return -EINVAL; + + if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) + return 0; + + return -EOVERFLOW; +} + +static void io_double_unlock_ctx(struct io_ring_ctx *ctx, + struct io_ring_ctx *octx, + unsigned int issue_flags) +{ + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_unlock(&ctx->uring_lock); + mutex_unlock(&octx->uring_lock); +} + +static int io_double_lock_ctx(struct io_ring_ctx *ctx, + struct io_ring_ctx *octx, + unsigned int issue_flags) +{ + /* + * To ensure proper ordering between the two ctxs, we can only + * attempt a trylock on the target. If that fails and we already have + * the source ctx lock, punt to io-wq. + */ + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + if (!mutex_trylock(&octx->uring_lock)) + return -EAGAIN; + return 0; + } + + /* Always grab smallest value ctx first. We know ctx != octx. */ + if (ctx < octx) { + mutex_lock(&ctx->uring_lock); + mutex_lock(&octx->uring_lock); + } else { + mutex_lock(&octx->uring_lock); + mutex_lock(&ctx->uring_lock); + } + + return 0; +} + +static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *target_ctx = req->file->private_data; + struct io_msg *msg = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + unsigned long file_ptr; + struct file *src_file; + int ret; + + if (target_ctx == ctx) + return -EINVAL; + + ret = io_double_lock_ctx(ctx, target_ctx, issue_flags); + if (unlikely(ret)) + return ret; + + ret = -EBADF; + if (unlikely(msg->src_fd >= ctx->nr_user_files)) + goto out_unlock; + + msg->src_fd = array_index_nospec(msg->src_fd, ctx->nr_user_files); + file_ptr = io_fixed_file_slot(&ctx->file_table, msg->src_fd)->file_ptr; + src_file = (struct file *) (file_ptr & FFS_MASK); + get_file(src_file); + + ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); + if (ret < 0) { + fput(src_file); + goto out_unlock; + } + + if (msg->flags & IORING_MSG_RING_CQE_SKIP) + goto out_unlock; + + /* + * If this fails, the target still received the file descriptor but + * wasn't notified of the fact. This means that if this request + * completes with -EOVERFLOW, then the sender must ensure that a + * later IORING_OP_MSG_RING delivers the message. + */ + if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) + ret = -EOVERFLOW; +out_unlock: + io_double_unlock_ctx(ctx, target_ctx, issue_flags); + return ret; +} + int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_msg *msg = io_kiocb_to_cmd(req); - if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in || - sqe->buf_index || sqe->personality)) + if (unlikely(sqe->buf_index || sqe->personality)) return -EINVAL; msg->user_data = READ_ONCE(sqe->off); msg->len = READ_ONCE(sqe->len); + msg->cmd = READ_ONCE(sqe->addr); + msg->src_fd = READ_ONCE(sqe->addr3); + msg->dst_fd = READ_ONCE(sqe->file_index); + msg->flags = READ_ONCE(sqe->msg_ring_flags); + if (msg->flags & ~IORING_MSG_RING_CQE_SKIP) + return -EINVAL; + return 0; } int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) { struct io_msg *msg = io_kiocb_to_cmd(req); - struct io_ring_ctx *target_ctx; int ret; ret = -EBADFD; if (!io_is_uring_fops(req->file)) goto done; - ret = -EOVERFLOW; - target_ctx = req->file->private_data; - if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) - ret = 0; + switch (msg->cmd) { + case IORING_MSG_DATA: + ret = io_msg_ring_data(req); + break; + case IORING_MSG_SEND_FD: + ret = io_msg_send_fd(req, issue_flags); + break; + default: + ret = -EINVAL; + break; + } done: if (ret < 0) -- cgit v1.2.3 From 52120f0fadcbdaaa981c19327f1865a714e85268 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 30 Jun 2022 02:12:26 -0700 Subject: io_uring: add allow_overflow to io_post_aux_cqe Some use cases of io_post_aux_cqe would not want to overflow as is, but might want to change the flags/result. For example multishot receive requires in order CQE, and so if there is an overflow it would need to stop receiving until the overflow is taken care of. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220630091231.1456789-8-dylany@fb.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 14 ++++++++++---- io_uring/io_uring.h | 3 ++- io_uring/msg_ring.c | 4 ++-- io_uring/net.c | 2 +- io_uring/poll.c | 2 +- io_uring/rsrc.c | 4 ++-- 6 files changed, 18 insertions(+), 11 deletions(-) (limited to 'io_uring/msg_ring.c') diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 745264938a48..523b6ebad15a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -736,7 +736,8 @@ struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx) } static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, - u64 user_data, s32 res, u32 cflags) + u64 user_data, s32 res, u32 cflags, + bool allow_overflow) { struct io_uring_cqe *cqe; @@ -760,16 +761,21 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, } return true; } - return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); + + if (allow_overflow) + return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); + + return false; } bool io_post_aux_cqe(struct io_ring_ctx *ctx, - u64 user_data, s32 res, u32 cflags) + u64 user_data, s32 res, u32 cflags, + bool allow_overflow) { bool filled; io_cq_lock(ctx); - filled = io_fill_cqe_aux(ctx, user_data, res, cflags); + filled = io_fill_cqe_aux(ctx, user_data, res, cflags, allow_overflow); io_cq_unlock_post(ctx); return filled; } diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e8da70781fa3..e022d71c177a 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -31,7 +31,8 @@ void io_req_complete_failed(struct io_kiocb *req, s32 res); void __io_req_complete(struct io_kiocb *req, unsigned issue_flags); void io_req_complete_post(struct io_kiocb *req); void __io_req_complete_post(struct io_kiocb *req); -bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags); +bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags, + bool allow_overflow); void __io_commit_cqring_flush(struct io_ring_ctx *ctx); struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 939205b30c8b..753d16734319 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -31,7 +31,7 @@ static int io_msg_ring_data(struct io_kiocb *req) if (msg->src_fd || msg->dst_fd || msg->flags) return -EINVAL; - if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) + if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true)) return 0; return -EOVERFLOW; @@ -113,7 +113,7 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) * completes with -EOVERFLOW, then the sender must ensure that a * later IORING_OP_MSG_RING delivers the message. */ - if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) + if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true)) ret = -EOVERFLOW; out_unlock: io_double_unlock_ctx(ctx, target_ctx, issue_flags); diff --git a/io_uring/net.c b/io_uring/net.c index e4422dff0704..601955fdb124 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -658,7 +658,7 @@ retry: if (ret < 0) return ret; - if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE)) + if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, true)) goto retry; return -ECANCELED; } diff --git a/io_uring/poll.c b/io_uring/poll.c index 64d426d696ab..e8f922a4f6d7 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -243,7 +243,7 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) req->apoll_events); if (!io_post_aux_cqe(ctx, req->cqe.user_data, - mask, IORING_CQE_F_MORE)) + mask, IORING_CQE_F_MORE, true)) return -ECANCELED; } else { ret = io_poll_issue(req, locked); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d2e589c703d0..0250c13ae1cd 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -175,10 +175,10 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) if (prsrc->tag) { if (ctx->flags & IORING_SETUP_IOPOLL) { mutex_lock(&ctx->uring_lock); - io_post_aux_cqe(ctx, prsrc->tag, 0, 0); + io_post_aux_cqe(ctx, prsrc->tag, 0, 0, true); mutex_unlock(&ctx->uring_lock); } else { - io_post_aux_cqe(ctx, prsrc->tag, 0, 0); + io_post_aux_cqe(ctx, prsrc->tag, 0, 0, true); } } -- cgit v1.2.3