summaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c690
1 files changed, 355 insertions, 335 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f084e3cf835..562e3a1a1bf9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -330,6 +330,26 @@ struct io_timeout {
struct file *file;
u64 addr;
int flags;
+ unsigned count;
+};
+
+struct io_rw {
+ /* NOTE: kiocb has the file as the first member, so don't do it here */
+ struct kiocb kiocb;
+ u64 addr;
+ u64 len;
+};
+
+struct io_connect {
+ struct file *file;
+ struct sockaddr __user *addr;
+ int addr_len;
+};
+
+struct io_sr_msg {
+ struct file *file;
+ struct user_msghdr __user *msg;
+ int msg_flags;
};
struct io_async_connect {
@@ -351,7 +371,6 @@ struct io_async_rw {
};
struct io_async_ctx {
- struct io_uring_sqe sqe;
union {
struct io_async_rw rw;
struct io_async_msghdr msg;
@@ -369,15 +388,16 @@ struct io_async_ctx {
struct io_kiocb {
union {
struct file *file;
- struct kiocb rw;
+ struct io_rw rw;
struct io_poll_iocb poll;
struct io_accept accept;
struct io_sync sync;
struct io_cancel cancel;
struct io_timeout timeout;
+ struct io_connect connect;
+ struct io_sr_msg sr_msg;
};
- const struct io_uring_sqe *sqe;
struct io_async_ctx *io;
struct file *ring_file;
int ring_fd;
@@ -411,7 +431,6 @@ struct io_kiocb {
#define REQ_F_INFLIGHT 16384 /* on inflight list */
#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
-#define REQ_F_PREPPED 131072 /* request already opcode prepared */
u64 user_data;
u32 result;
u32 sequence;
@@ -609,33 +628,31 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
{
bool do_hashed = false;
- if (req->sqe) {
- switch (req->opcode) {
- case IORING_OP_WRITEV:
- case IORING_OP_WRITE_FIXED:
- /* only regular files should be hashed for writes */
- if (req->flags & REQ_F_ISREG)
- do_hashed = true;
- /* fall-through */
- case IORING_OP_READV:
- case IORING_OP_READ_FIXED:
- case IORING_OP_SENDMSG:
- case IORING_OP_RECVMSG:
- case IORING_OP_ACCEPT:
- case IORING_OP_POLL_ADD:
- case IORING_OP_CONNECT:
- /*
- * We know REQ_F_ISREG is not set on some of these
- * opcodes, but this enables us to keep the check in
- * just one place.
- */
- if (!(req->flags & REQ_F_ISREG))
- req->work.flags |= IO_WQ_WORK_UNBOUND;
- break;
- }
- if (io_req_needs_user(req))
- req->work.flags |= IO_WQ_WORK_NEEDS_USER;
+ switch (req->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ /* only regular files should be hashed for writes */
+ if (req->flags & REQ_F_ISREG)
+ do_hashed = true;
+ /* fall-through */
+ case IORING_OP_READV:
+ case IORING_OP_READ_FIXED:
+ case IORING_OP_SENDMSG:
+ case IORING_OP_RECVMSG:
+ case IORING_OP_ACCEPT:
+ case IORING_OP_POLL_ADD:
+ case IORING_OP_CONNECT:
+ /*
+ * We know REQ_F_ISREG is not set on some of these
+ * opcodes, but this enables us to keep the check in
+ * just one place.
+ */
+ if (!(req->flags & REQ_F_ISREG))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+ break;
}
+ if (io_req_needs_user(req))
+ req->work.flags |= IO_WQ_WORK_NEEDS_USER;
*link = io_prep_linked_timeout(req);
return do_hashed;
@@ -1180,7 +1197,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
/*
* Move completed entries to our local list. If we find a
@@ -1335,7 +1352,7 @@ static inline void req_set_fail_links(struct io_kiocb *req)
static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
@@ -1347,7 +1364,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
io_complete_rw_common(kiocb, res);
io_put_req(req);
@@ -1355,7 +1372,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
struct io_kiocb *nxt = NULL;
io_complete_rw_common(kiocb, res);
@@ -1366,7 +1383,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
@@ -1400,7 +1417,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
list);
- if (list_req->rw.ki_filp != req->rw.ki_filp)
+ if (list_req->file != req->file)
ctx->poll_multi_file = true;
}
@@ -1471,11 +1488,11 @@ static bool io_file_supports_async(struct file *file)
return false;
}
-static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
+static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
unsigned ioprio;
int ret;
@@ -1524,6 +1541,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
return -EINVAL;
kiocb->ki_complete = io_complete_rw;
}
+
+ req->rw.addr = READ_ONCE(sqe->addr);
+ req->rw.len = READ_ONCE(sqe->len);
+ /* we own ->private, reuse it for the buffer index */
+ req->rw.kiocb.private = (void *) (unsigned long)
+ READ_ONCE(sqe->buf_index);
return 0;
}
@@ -1557,11 +1580,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
io_rw_done(kiocb, ret);
}
-static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
- const struct io_uring_sqe *sqe,
+static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
struct iov_iter *iter)
{
- size_t len = READ_ONCE(sqe->len);
+ struct io_ring_ctx *ctx = req->ctx;
+ size_t len = req->rw.len;
struct io_mapped_ubuf *imu;
unsigned index, buf_index;
size_t offset;
@@ -1571,13 +1594,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
if (unlikely(!ctx->user_bufs))
return -EFAULT;
- buf_index = READ_ONCE(sqe->buf_index);
+ buf_index = (unsigned long) req->rw.kiocb.private;
if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT;
index = array_index_nospec(buf_index, ctx->nr_user_bufs);
imu = &ctx->user_bufs[index];
- buf_addr = READ_ONCE(sqe->addr);
+ buf_addr = req->rw.addr;
/* overflow */
if (buf_addr + len < buf_addr)
@@ -1634,25 +1657,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
struct iovec **iovec, struct iov_iter *iter)
{
- const struct io_uring_sqe *sqe = req->sqe;
- void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
- size_t sqe_len = READ_ONCE(sqe->len);
+ void __user *buf = u64_to_user_ptr(req->rw.addr);
+ size_t sqe_len = req->rw.len;
u8 opcode;
- /*
- * We're reading ->opcode for the second time, but the first read
- * doesn't care whether it's _FIXED or not, so it doesn't matter
- * whether ->opcode changes concurrently. The first read does care
- * about whether it is a READ or a WRITE, so we don't trust this read
- * for that purpose and instead let the caller pass in the read/write
- * flag.
- */
opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL;
- return io_import_fixed(req->ctx, rw, sqe, iter);
+ return io_import_fixed(req, rw, iter);
}
+ /* buffer index only valid with fixed read/write */
+ if (req->rw.kiocb.private)
+ return -EINVAL;
+
if (req->io) {
struct io_async_rw *iorw = &req->io->rw;
@@ -1750,13 +1768,7 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
static int io_alloc_async_ctx(struct io_kiocb *req)
{
req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
- if (req->io) {
- memcpy(&req->io->sqe, req->sqe, sizeof(req->io->sqe));
- req->sqe = &req->io->sqe;
- return 0;
- }
-
- return 1;
+ return req->io == NULL;
}
static void io_rw_async(struct io_wq_work **workptr)
@@ -1782,46 +1794,52 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
return 0;
}
-static int io_read_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_READ)))
return -EBADF;
- return io_import_iovec(READ, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(READ, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t io_size, ret;
- if (!req->io) {
- ret = io_read_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(READ, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(READ, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.ki_flags &= ~IOCB_NOWAIT;
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = req->file;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1830,20 +1848,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(file)) {
+ if (force_nonblock && !io_file_supports_async(req->file)) {
req->flags |= REQ_F_MUST_PUNT;
goto copy_iov;
}
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
- if (file->f_op->read_iter)
- ret2 = call_read_iter(file, kiocb, &iter);
+ if (req->file->f_op->read_iter)
+ ret2 = call_read_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+ ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
/*
* In case of a short read, punt to async. This can happen
@@ -1875,46 +1893,52 @@ out_free:
return ret;
}
-static int io_write_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
- return io_import_iovec(WRITE, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t ret, io_size;
- if (!req->io) {
- ret = io_write_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(WRITE, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(WRITE, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.ki_flags &= ~IOCB_NOWAIT;
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = kiocb->ki_filp;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1934,7 +1958,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
goto copy_iov;
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
@@ -1946,17 +1970,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
- __sb_start_write(file_inode(file)->i_sb,
+ __sb_start_write(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE, true);
- __sb_writers_release(file_inode(file)->i_sb,
+ __sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
kiocb->ki_flags |= IOCB_WRITE;
- if (file->f_op->write_iter)
- ret2 = call_write_iter(file, kiocb, &iter);
+ if (req->file->f_op->write_iter)
+ ret2 = call_write_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
+ ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, nxt, req->in_async);
} else {
@@ -1989,13 +2013,10 @@ static int io_nop(struct io_kiocb *req)
return 0;
}
-static int io_prep_fsync(struct io_kiocb *req)
+static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (!req->file)
return -EBADF;
@@ -2010,7 +2031,6 @@ static int io_prep_fsync(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2036,7 +2056,7 @@ static void io_fsync_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req))
return;
- ret = vfs_fsync_range(req->rw.ki_filp, req->sync.off,
+ ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC);
if (ret < 0)
@@ -2051,11 +2071,6 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct io_wq_work *work, *old_work;
- int ret;
-
- ret = io_prep_fsync(req);
- if (ret)
- return ret;
/* fsync always requires a blocking context */
if (force_nonblock) {
@@ -2071,13 +2086,10 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
return 0;
}
-static int io_prep_sfr(struct io_kiocb *req)
+static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (!req->file)
return -EBADF;
@@ -2089,7 +2101,6 @@ static int io_prep_sfr(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len);
req->sync.flags = READ_ONCE(sqe->sync_range_flags);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2102,7 +2113,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req))
return;
- ret = sync_file_range(req->rw.ki_filp, req->sync.off, req->sync.len,
+ ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags);
if (ret < 0)
req_set_fail_links(req);
@@ -2116,11 +2127,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct io_wq_work *work, *old_work;
- int ret;
-
- ret = io_prep_sfr(req);
- if (ret)
- return ret;
/* sync_file_range always requires a blocking context */
if (force_nonblock) {
@@ -2149,19 +2155,23 @@ static void io_sendrecv_async(struct io_wq_work **workptr)
}
#endif
-static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
+
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
- return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov);
+ return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2169,7 +2179,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2183,12 +2192,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
struct sockaddr_storage addr;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
if (req->io) {
kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr;
@@ -2197,13 +2200,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
+ struct io_sr_msg *sr = &req->sr_msg;
+
kmsg = &io.msg;
kmsg->msg.msg_name = &addr;
- ret = io_sendmsg_prep(req, &io);
+
+ io.msg.iov = io.msg.fast_iov;
+ ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (force_nonblock && ret == -EAGAIN) {
if (req->io)
@@ -2218,7 +2232,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
-out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
io_cqring_add_event(req, ret);
@@ -2231,20 +2244,24 @@ out:
#endif
}
-static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_recvmsg_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
+
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
- return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr,
- &io->msg.iov);
+ return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.uaddr, &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2252,7 +2269,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2262,19 +2278,10 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
sock = sock_from_file(req->file, &ret);
if (sock) {
- struct user_msghdr __user *msg;
struct io_async_ctx io;
struct sockaddr_storage addr;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
- msg = (struct user_msghdr __user *) (unsigned long)
- READ_ONCE(sqe->addr);
if (req->io) {
kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr;
@@ -2283,14 +2290,27 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
+ struct io_sr_msg *sr = &req->sr_msg;
+
kmsg = &io.msg;
kmsg->msg.msg_name = &addr;
- ret = io_recvmsg_prep(req, &io);
+
+ io.msg.iov = io.msg.fast_iov;
+ ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.uaddr,
+ &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
- ret = __sys_recvmsg_sock(sock, &kmsg->msg, msg, kmsg->uaddr, flags);
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
+ ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
+ kmsg->uaddr, flags);
if (force_nonblock && ret == -EAGAIN) {
if (req->io)
return -EAGAIN;
@@ -2304,7 +2324,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
-out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
io_cqring_add_event(req, ret);
@@ -2317,25 +2336,19 @@ out:
#endif
}
-static int io_accept_prep(struct io_kiocb *req)
+static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_accept *accept = &req->accept;
- if (req->flags & REQ_F_PREPPED)
- return 0;
-
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index)
return -EINVAL;
- accept->addr = (struct sockaddr __user *)
- (unsigned long) READ_ONCE(sqe->addr);
- accept->addr_len = (int __user *) (unsigned long) READ_ONCE(sqe->addr2);
+ accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
- req->flags |= REQ_F_PREPPED;
return 0;
#else
return -EOPNOTSUPP;
@@ -2383,10 +2396,6 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
#if defined(CONFIG_NET)
int ret;
- ret = io_accept_prep(req);
- if (ret)
- return ret;
-
ret = __io_accept(req, nxt, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_accept_finish;
@@ -2400,18 +2409,27 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
#endif
}
-static int io_connect_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct sockaddr __user *addr;
- int addr_len;
+ struct io_connect *conn = &req->connect;
+ struct io_async_ctx *io = req->io;
+
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
+ return -EINVAL;
- addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr);
- addr_len = READ_ONCE(sqe->addr2);
- return move_addr_to_kernel(addr, addr_len, &io->connect.address);
+ conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ conn->addr_len = READ_ONCE(sqe->addr2);
+
+ if (!io)
+ return 0;
+
+ return move_addr_to_kernel(conn->addr, conn->addr_len,
+ &io->connect.address);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2419,30 +2437,25 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_ctx __io, *io;
unsigned file_flags;
- int addr_len, ret;
-
- if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
- return -EINVAL;
-
- addr_len = READ_ONCE(sqe->addr2);
- file_flags = force_nonblock ? O_NONBLOCK : 0;
+ int ret;
if (req->io) {
io = req->io;
} else {
- ret = io_connect_prep(req, &__io);
+ ret = move_addr_to_kernel(req->connect.addr,
+ req->connect.addr_len,
+ &__io.connect.address);
if (ret)
goto out;
io = &__io;
}
- ret = __sys_connect_file(req->file, &io->connect.address, addr_len,
- file_flags);
+ file_flags = force_nonblock ? O_NONBLOCK : 0;
+
+ ret = __sys_connect_file(req->file, &io->connect.address,
+ req->connect.addr_len, file_flags);
if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
if (req->io)
return -EAGAIN;
@@ -2513,12 +2526,9 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
return -ENOENT;
}
-static int io_poll_remove_prep(struct io_kiocb *req)
+static int io_poll_remove_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
-
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
@@ -2526,7 +2536,6 @@ static int io_poll_remove_prep(struct io_kiocb *req)
return -EINVAL;
req->poll.addr = READ_ONCE(sqe->addr);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2540,10 +2549,6 @@ static int io_poll_remove(struct io_kiocb *req)
u64 addr;
int ret;
- ret = io_poll_remove_prep(req);
- if (ret)
- return ret;
-
addr = req->poll.addr;
spin_lock_irq(&ctx->completion_lock);
ret = io_poll_cancel(ctx, addr);
@@ -2681,14 +2686,11 @@ static void io_poll_req_insert(struct io_kiocb *req)
hlist_add_head(&req->hash_node, list);
}
-static int io_poll_add_prep(struct io_kiocb *req)
+static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_poll_iocb *poll = &req->poll;
u16 events;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
@@ -2696,7 +2698,6 @@ static int io_poll_add_prep(struct io_kiocb *req)
if (!poll->file)
return -EBADF;
- req->flags |= REQ_F_PREPPED;
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
return 0;
@@ -2709,11 +2710,6 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
struct io_poll_table ipt;
bool cancel = false;
__poll_t mask;
- int ret;
-
- ret = io_poll_add_prep(req);
- if (ret)
- return ret;
INIT_IO_WORK(&req->work, io_poll_complete_work);
INIT_HLIST_NODE(&req->hash_node);
@@ -2832,12 +2828,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
return 0;
}
-static int io_timeout_remove_prep(struct io_kiocb *req)
+static int io_timeout_remove_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
-
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
@@ -2848,7 +2841,6 @@ static int io_timeout_remove_prep(struct io_kiocb *req)
if (req->timeout.flags)
return -EINVAL;
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2860,10 +2852,6 @@ static int io_timeout_remove(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_timeout_remove_prep(req);
- if (ret)
- return ret;
-
spin_lock_irq(&ctx->completion_lock);
ret = io_timeout_cancel(ctx, req->timeout.addr);
@@ -2877,10 +2865,9 @@ static int io_timeout_remove(struct io_kiocb *req)
return 0;
}
-static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
+static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool is_timeout_link)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_timeout_data *data;
unsigned flags;
@@ -2894,7 +2881,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
if (flags & ~IORING_TIMEOUT_ABS)
return -EINVAL;
- data = &io->timeout;
+ req->timeout.count = READ_ONCE(sqe->off);
+
+ if (!req->io && io_alloc_async_ctx(req))
+ return -ENOMEM;
+
+ data = &req->io->timeout;
data->req = req;
req->flags |= REQ_F_TIMEOUT;
@@ -2912,21 +2904,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
static int io_timeout(struct io_kiocb *req)
{
- const struct io_uring_sqe *sqe = req->sqe;
unsigned count;
struct io_ring_ctx *ctx = req->ctx;
struct io_timeout_data *data;
struct list_head *entry;
unsigned span = 0;
- int ret;
- if (!req->io) {
- if (io_alloc_async_ctx(req))
- return -ENOMEM;
- ret = io_timeout_prep(req, req->io, false);
- if (ret)
- return ret;
- }
data = &req->io->timeout;
/*
@@ -2934,7 +2917,7 @@ static int io_timeout(struct io_kiocb *req)
* timeout event to be satisfied. If it isn't set, then this is
* a pure timeout request, sequence isn't used.
*/
- count = READ_ONCE(sqe->off);
+ count = req->timeout.count;
if (!count) {
req->flags |= REQ_F_TIMEOUT_NOSEQ;
spin_lock_irq(&ctx->completion_lock);
@@ -3052,19 +3035,15 @@ done:
io_put_req_find_next(req, nxt);
}
-static int io_async_cancel_prep(struct io_kiocb *req)
+static int io_async_cancel_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
-
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->flags || sqe->ioprio || sqe->off || sqe->len ||
sqe->cancel_flags)
return -EINVAL;
- req->flags |= REQ_F_PREPPED;
req->cancel.addr = READ_ONCE(sqe->addr);
return 0;
}
@@ -3072,21 +3051,14 @@ static int io_async_cancel_prep(struct io_kiocb *req)
static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt)
{
struct io_ring_ctx *ctx = req->ctx;
- int ret;
-
- ret = io_async_cancel_prep(req);
- if (ret)
- return ret;
io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0);
return 0;
}
-static int io_req_defer_prep(struct io_kiocb *req)
+static int io_req_defer_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct io_async_ctx *io = req->io;
- struct iov_iter iter;
ssize_t ret = 0;
switch (req->opcode) {
@@ -3094,61 +3066,47 @@ static int io_req_defer_prep(struct io_kiocb *req)
break;
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
- /* ensure prep does right import */
- req->io = NULL;
- ret = io_read_prep(req, &iovec, &iter, true);
- req->io = io;
- if (ret < 0)
- break;
- io_req_map_rw(req, ret, iovec, inline_vecs, &iter);
- ret = 0;
+ ret = io_read_prep(req, sqe, true);
break;
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
- /* ensure prep does right import */
- req->io = NULL;
- ret = io_write_prep(req, &iovec, &iter, true);
- req->io = io;
- if (ret < 0)
- break;
- io_req_map_rw(req, ret, iovec, inline_vecs, &iter);
- ret = 0;
+ ret = io_write_prep(req, sqe, true);
break;
case IORING_OP_POLL_ADD:
- ret = io_poll_add_prep(req);
+ ret = io_poll_add_prep(req, sqe);
break;
case IORING_OP_POLL_REMOVE:
- ret = io_poll_remove_prep(req);
+ ret = io_poll_remove_prep(req, sqe);
break;
case IORING_OP_FSYNC:
- ret = io_prep_fsync(req);
+ ret = io_prep_fsync(req, sqe);
break;
case IORING_OP_SYNC_FILE_RANGE:
- ret = io_prep_sfr(req);
+ ret = io_prep_sfr(req, sqe);
break;
case IORING_OP_SENDMSG:
- ret = io_sendmsg_prep(req, io);
+ ret = io_sendmsg_prep(req, sqe);
break;
case IORING_OP_RECVMSG:
- ret =