summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/io_uring.c40
1 files changed, 27 insertions, 13 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 244aaccbc82b..dfa99da8b2d1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -224,13 +224,14 @@ struct io_ring_ctx {
unsigned sq_thread_idle;
unsigned cached_sq_dropped;
atomic_t cached_cq_overflow;
- struct io_uring_sqe *sq_sqes;
+ unsigned long sq_check_overflow;
struct list_head defer_list;
struct list_head timeout_list;
struct list_head cq_overflow_list;
wait_queue_head_t inflight_wait;
+ struct io_uring_sqe *sq_sqes;
} ____cacheline_aligned_in_smp;
struct io_rings *rings;
@@ -272,6 +273,7 @@ struct io_ring_ctx {
unsigned cq_entries;
unsigned cq_mask;
atomic_t cq_timeouts;
+ unsigned long cq_check_overflow;
struct wait_queue_head cq_wait;
struct fasync_struct *cq_fasync;
struct eventfd_ctx *cq_ev_fd;
@@ -950,6 +952,10 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
}
io_commit_cqring(ctx);
+ if (cqe) {
+ clear_bit(0, &ctx->sq_check_overflow);
+ clear_bit(0, &ctx->cq_check_overflow);
+ }
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
@@ -983,6 +989,10 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
} else {
+ if (list_empty(&ctx->cq_overflow_list)) {
+ set_bit(0, &ctx->sq_check_overflow);
+ set_bit(0, &ctx->cq_check_overflow);
+ }
refcount_inc(&req->refs);
req->result = res;
list_add_tail(&req->list, &ctx->cq_overflow_list);
@@ -1285,19 +1295,21 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
{
struct io_rings *rings = ctx->rings;
- /*
- * noflush == true is from the waitqueue handler, just ensure we wake
- * up the task, and the next invocation will flush the entries. We
- * cannot safely to it from here.
- */
- if (noflush && !list_empty(&ctx->cq_overflow_list))
- return -1U;
+ if (test_bit(0, &ctx->cq_check_overflow)) {
+ /*
+ * noflush == true is from the waitqueue handler, just ensure
+ * we wake up the task, and the next invocation will flush the
+ * entries. We cannot safely to it from here.
+ */
+ if (noflush && !list_empty(&ctx->cq_overflow_list))
+ return -1U;
- io_cqring_overflow_flush(ctx, false);
+ io_cqring_overflow_flush(ctx, false);
+ }
/* See comment at the top of this file */
smp_rmb();
- return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
+ return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
}
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
@@ -4309,9 +4321,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
bool mm_fault = false;
/* if we have a backlog and couldn't flush it all, return BUSY */
- if (!list_empty(&ctx->cq_overflow_list) &&
- !io_cqring_overflow_flush(ctx, false))
- return -EBUSY;
+ if (test_bit(0, &ctx->sq_check_overflow)) {
+ if (!list_empty(&ctx->cq_overflow_list) &&
+ !io_cqring_overflow_flush(ctx, false))
+ return -EBUSY;
+ }
if (nr > IO_PLUG_THRESHOLD) {
io_submit_state_start(&state, nr);