From 4d671d922d51907bc41f1f7f2dc737c928ae78fd Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 28 Aug 2020 21:56:13 -0400 Subject: seccomp: kill process instead of thread for unknown actions Asynchronous termination of a thread outside of the userspace thread library's knowledge is an unsafe operation that leaves the process in an inconsistent, corrupt, and possibly unrecoverable state. In order to make new actions that may be added in the future safe on kernels not aware of them, change the default action from SECCOMP_RET_KILL_THREAD to SECCOMP_RET_KILL_PROCESS. Signed-off-by: Rich Felker Link: https://lore.kernel.org/r/20200829015609.GA32566@brightrain.aerifal.cx [kees: Fixed up coredump selection logic to match] Signed-off-by: Kees Cook --- kernel/seccomp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 676d4af62103..f754c1087e41 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1020,7 +1020,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, default: seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ - if (action == SECCOMP_RET_KILL_PROCESS || + if (action != SECCOMP_RET_KILL_THREAD || get_nr_threads(current) == 1) { kernel_siginfo_t info; @@ -1030,10 +1030,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, seccomp_init_siginfo(&info, this_syscall, data); do_coredump(&info); } - if (action == SECCOMP_RET_KILL_PROCESS) - do_group_exit(SIGSYS); - else + if (action == SECCOMP_RET_KILL_THREAD) do_exit(SIGSYS); + else + do_group_exit(SIGSYS); } unreachable(); -- cgit v1.2.3 From 2d9ca267a944c2b6ed5b4d750b1cf0407b6262b4 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 24 Aug 2020 15:59:21 +0300 Subject: seccomp: Use current_pt_regs() instead of task_pt_regs(current) As described in commit a3460a59747c ("new helper: current_pt_regs()"): - arch versions are "optimized versions". - some architectures have task_pt_regs() working only for traced tasks blocked on signal delivery. current_pt_regs() needs to work for *all* processes. In preparation for adding a coccinelle rule for using current_*(), instead of raw accesses to current members, modify seccomp_do_user_notification(), __seccomp_filter(), __secure_computing() to use current_pt_regs(). Signed-off-by: Denis Efremov Link: https://lore.kernel.org/r/20200824125921.488311-1-efremov@linux.com [kees: Reworded commit log, add comment to populate_seccomp_data()] Signed-off-by: Kees Cook --- kernel/seccomp.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f754c1087e41..ae6b40cc39f4 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -196,6 +196,10 @@ struct seccomp_filter { */ static void populate_seccomp_data(struct seccomp_data *sd) { + /* + * Instead of using current_pt_reg(), we're already doing the work + * to safely fetch "current", so just use "task" everywhere below. + */ struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); unsigned long args[6]; @@ -910,7 +914,7 @@ out: if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) return 0; - syscall_set_return_value(current, task_pt_regs(current), + syscall_set_return_value(current, current_pt_regs(), err, ret); return -1; } @@ -943,13 +947,13 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, /* Set low-order bits as an errno, capped at MAX_ERRNO. */ if (data > MAX_ERRNO) data = MAX_ERRNO; - syscall_set_return_value(current, task_pt_regs(current), + syscall_set_return_value(current, current_pt_regs(), -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ - syscall_rollback(current, task_pt_regs(current)); + syscall_rollback(current, current_pt_regs()); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; @@ -962,7 +966,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, /* ENOSYS these calls if there is no tracer attached. */ if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { syscall_set_return_value(current, - task_pt_regs(current), + current_pt_regs(), -ENOSYS, 0); goto skip; } @@ -982,7 +986,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, if (fatal_signal_pending(current)) goto skip; /* Check if the tracer forced the syscall to be skipped. */ - this_syscall = syscall_get_nr(current, task_pt_regs(current)); + this_syscall = syscall_get_nr(current, current_pt_regs()); if (this_syscall < 0) goto skip; @@ -1025,7 +1029,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, kernel_siginfo_t info; /* Show the original registers in the dump. */ - syscall_rollback(current, task_pt_regs(current)); + syscall_rollback(current, current_pt_regs()); /* Trigger a manual coredump since do_exit skips it. */ seccomp_init_siginfo(&info, this_syscall, data); do_coredump(&info); @@ -1060,7 +1064,7 @@ int __secure_computing(const struct seccomp_data *sd) return 0; this_syscall = sd ? sd->nr : - syscall_get_nr(current, task_pt_regs(current)); + syscall_get_nr(current, current_pt_regs()); switch (mode) { case SECCOMP_MODE_STRICT: -- cgit v1.2.3 From dfe719fef03d752f1682fa8aeddf30ba501c8555 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 5 Oct 2020 03:44:01 +0200 Subject: seccomp: Make duplicate listener detection non-racy Currently, init_listener() tries to prevent adding a filter with SECCOMP_FILTER_FLAG_NEW_LISTENER if one of the existing filters already has a listener. However, this check happens without holding any lock that would prevent another thread from concurrently installing a new filter (potentially with a listener) on top of the ones we already have. Theoretically, this is also a data race: The plain load from current->seccomp.filter can race with concurrent writes to the same location. Fix it by moving the check into the region that holds the siglock to guard against concurrent TSYNC. (The "Fixes" tag points to the commit that introduced the theoretical data race; concurrent installation of another filter with TSYNC only became possible later, in commit 51891498f2da ("seccomp: allow TSYNC and USER_NOTIF together").) Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Reviewed-by: Tycho Andersen Signed-off-by: Jann Horn Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201005014401.490175-1-jannh@google.com --- kernel/seccomp.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ae6b40cc39f4..8ad7a293255a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1476,13 +1476,7 @@ static const struct file_operations seccomp_notify_ops = { static struct file *init_listener(struct seccomp_filter *filter) { - struct file *ret = ERR_PTR(-EBUSY); - struct seccomp_filter *cur; - - for (cur = current->seccomp.filter; cur; cur = cur->prev) { - if (cur->notif) - goto out; - } + struct file *ret; ret = ERR_PTR(-ENOMEM); filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); @@ -1508,6 +1502,31 @@ out: return ret; } +/* + * Does @new_child have a listener while an ancestor also has a listener? + * If so, we'll want to reject this filter. + * This only has to be tested for the current process, even in the TSYNC case, + * because TSYNC installs @child with the same parent on all threads. + * Note that @new_child is not hooked up to its parent at this point yet, so + * we use current->seccomp.filter. + */ +static bool has_duplicate_listener(struct seccomp_filter *new_child) +{ + struct seccomp_filter *cur; + + /* must be protected against concurrent TSYNC */ + lockdep_assert_held(¤t->sighand->siglock); + + if (!new_child->notif) + return false; + for (cur = current->seccomp.filter; cur; cur = cur->prev) { + if (cur->notif) + return true; + } + + return false; +} + /** * seccomp_set_mode_filter: internal function for setting seccomp filter * @flags: flags to change filter behavior @@ -1579,6 +1598,11 @@ static long seccomp_set_mode_filter(unsigned int flags, if (!seccomp_may_assign_mode(seccomp_mode)) goto out; + if (has_duplicate_listener(prepared)) { + ret = -EBUSY; + goto out; + } + ret = seccomp_attach_filter(flags, prepared); if (ret) goto out; -- cgit v1.2.3