From 880641bb9da2473e9ecf6c708d993b29928c1b3c Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 5 Mar 2012 14:59:12 -0800 Subject: aio: wake up waiters when freeing unused kiocbs Bart Van Assche reported a hung fio process when either hot-removing storage or when interrupting the fio process itself. The (pruned) call trace for the latter looks like so: fio D 0000000000000001 0 6849 6848 0x00000004 ffff880092541b88 0000000000000046 ffff880000000000 ffff88012fa11dc0 ffff88012404be70 ffff880092541fd8 ffff880092541fd8 ffff880092541fd8 ffff880128b894d0 ffff88012404be70 ffff880092541b88 000000018106f24d Call Trace: schedule+0x3f/0x60 io_schedule+0x8f/0xd0 wait_for_all_aios+0xc0/0x100 exit_aio+0x55/0xc0 mmput+0x2d/0x110 exit_mm+0x10d/0x130 do_exit+0x671/0x860 do_group_exit+0x44/0xb0 get_signal_to_deliver+0x218/0x5a0 do_signal+0x65/0x700 do_notify_resume+0x65/0x80 int_signal+0x12/0x17 The problem lies with the allocation batching code. It will opportunistically allocate kiocbs, and then trim back the list of iocbs when there is not enough room in the completion ring to hold all of the events. In the case above, what happens is that the pruning back of events ends up freeing up the last active request and the context is marked as dead, so it is thus responsible for waking up waiters. Unfortunately, the code does not check for this condition, so we end up with a hung task. Signed-off-by: Jeff Moyer Reported-by: Bart Van Assche Tested-by: Bart Van Assche Cc: [3.2.x only] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 969beb0e2231..67e4b9047cc9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -490,6 +490,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; } + if (unlikely(!ctx->reqs_active && ctx->dead)) + wake_up_all(&ctx->wait); spin_unlock_irq(&ctx->ctx_lock); } -- cgit v1.2.3 From c415c3b47ea2754659d915cca387a20999044163 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: vfork: introduce complete_vfork_done() No functional changes. Move the clear-and-complete-vfork_done code into the new trivial helper, complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 92ce83a11e90..dccdcec913e9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1915,7 +1915,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct completion *vfork_done; int core_waiters = -EBUSY; init_completion(&core_state->startup); @@ -1934,11 +1933,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state) * Make sure nobody is waiting for us to release the VM, * otherwise we can deadlock when we wait on each other */ - vfork_done = tsk->vfork_done; - if (vfork_done) { - tsk->vfork_done = NULL; - complete(vfork_done); - } + if (tsk->vfork_done) + complete_vfork_done(tsk); if (core_waiters) wait_for_completion(&core_state->startup); -- cgit v1.2.3 From 57b59c4a1400fa6c34764eab2e35a8762dc05a09 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: coredump_wait: don't call complete_vfork_done() Now that CLONE_VFORK is killable, coredump_wait() no longer needs complete_vfork_done(). zap_threads() should find and kill all tasks with the same ->mm, this includes our parent if ->vfork_done is set. mm_release() becomes the only caller, unexport complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index dccdcec913e9..153dee14fe55 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1926,19 +1926,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state) core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); - if (unlikely(core_waiters < 0)) - goto fail; - - /* - * Make sure nobody is waiting for us to release the VM, - * otherwise we can deadlock when we wait on each other - */ - if (tsk->vfork_done) - complete_vfork_done(tsk); - - if (core_waiters) + if (core_waiters > 0) wait_for_completion(&core_state->startup); -fail: + return core_waiters; } -- cgit v1.2.3