summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-04 14:27:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-04 14:27:25 -0700
commit3950e975431bc914f7e81b8f2a2dbdf2064acb0f (patch)
tree32adad006224780b83a23201f97368fb45bd4354
parentfd76a74d940ae3d6b8b2395cd12914630c7e1739 (diff)
parent7fce69dff8db30cb93aace0bbebda09972027af7 (diff)
Merge branch 'exec-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull execve updates from Eric Biederman: "During the development of v5.7 I ran into bugs and quality of implementation issues related to exec that could not be easily fixed because of the way exec is implemented. So I have been diggin into exec and cleaning up what I can. This cycle I have been looking at different ideas and different implementations to see what is possible to improve exec, and cleaning the way exec interfaces with in kernel users. Only cleaning up the interfaces of exec with rest of the kernel has managed to stabalize and make it through review in time for v5.9-rc1 resulting in 2 sets of changes this cycle. - Implement kernel_execve - Make the user mode driver code a better citizen With kernel_execve the code size got a little larger as the copying of parameters from userspace and copying of parameters from userspace is now separate. The good news is kernel threads no longer need to play games with set_fs to use exec. Which when combined with the rest of Christophs set_fs changes should security bugs with set_fs much more difficult" * 'exec-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (23 commits) exec: Implement kernel_execve exec: Factor bprm_stack_limits out of prepare_arg_pages exec: Factor bprm_execve out of do_execve_common exec: Move bprm_mm_init into alloc_bprm exec: Move initialization of bprm->filename into alloc_bprm exec: Factor out alloc_bprm exec: Remove unnecessary spaces from binfmts.h umd: Stop using split_argv umd: Remove exit_umh bpfilter: Take advantage of the facilities of struct pid exit: Factor thread_group_exited out of pidfd_poll umd: Track user space drivers with struct pid bpfilter: Move bpfilter_umh back into init data exec: Remove do_execve_file umh: Stop calling do_execve_file umd: Transform fork_usermode_blob into fork_usermode_driver umd: Rename umd_info.cmdline umd_info.driver_name umd: For clarity rename umh_info umd_info umh: Separate the user mode driver and the user mode helper support umh: Remove call_usermodehelper_setup_file. ...
-rw-r--r--arch/x86/entry/entry_32.S2
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/kernel/unwind_frame.c2
-rw-r--r--fs/exec.c307
-rw-r--r--include/linux/binfmts.h21
-rw-r--r--include/linux/bpfilter.h7
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/sched/signal.h2
-rw-r--r--include/linux/umh.h15
-rw-r--r--include/linux/usermode_driver.h18
-rw-r--r--init/main.c4
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c25
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/umh.c171
-rw-r--r--kernel/usermode_driver.c182
-rw-r--r--net/bpfilter/bpfilter_kern.c38
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S2
-rw-r--r--net/ipv4/bpfilter/sockopt.c20
-rw-r--r--security/tomoyo/common.h2
-rw-r--r--security/tomoyo/domain.c4
-rw-r--r--security/tomoyo/tomoyo.c4
22 files changed, 480 insertions, 364 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2d0bd5d5f032..4430ee1fae71 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -854,7 +854,7 @@ SYM_CODE_START(ret_from_fork)
CALL_NOSPEC ebx
/*
* A kernel thread is allowed to return here after successfully
- * calling do_execve(). Exit to userspace to complete the execve()
+ * calling kernel_execve(). Exit to userspace to complete the execve()
* syscall.
*/
movl $0, PT_EAX(%esp)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2a00c97e53f..73c7e255256b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -293,7 +293,7 @@ SYM_CODE_START(ret_from_fork)
CALL_NOSPEC rbx
/*
* A kernel thread is allowed to return here after successfully
- * calling do_execve(). Exit to userspace to complete the execve()
+ * calling kernel_execve(). Exit to userspace to complete the execve()
* syscall.
*/
movq $0, RAX(%rsp)
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 722a85f3b2dd..e40b4942157f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -275,7 +275,7 @@ bool unwind_next_frame(struct unwind_state *state)
* This user_mode() check is slightly broader than a PF_KTHREAD
* check because it also catches the awkward situation where a
* newly forked kthread transitions into a user task by calling
- * do_execve(), which eventually clears PF_KTHREAD.
+ * kernel_execve(), which eventually clears PF_KTHREAD.
*/
if (!user_mode(regs))
goto the_end;
diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a70327..3698252719a3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -448,18 +448,26 @@ static int count(struct user_arg_ptr argv, int max)
return i;
}
-static int prepare_arg_pages(struct linux_binprm *bprm,
- struct user_arg_ptr argv, struct user_arg_ptr envp)
+static int count_strings_kernel(const char *const *argv)
{
- unsigned long limit, ptr_size;
+ int i;
- bprm->argc = count(argv, MAX_ARG_STRINGS);
- if (bprm->argc < 0)
- return bprm->argc;
+ if (!argv)
+ return 0;
- bprm->envc = count(envp, MAX_ARG_STRINGS);
- if (bprm->envc < 0)
- return bprm->envc;
+ for (i = 0; argv[i]; ++i) {
+ if (i >= MAX_ARG_STRINGS)
+ return -E2BIG;
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
+ cond_resched();
+ }
+ return i;
+}
+
+static int bprm_stack_limits(struct linux_binprm *bprm)
+{
+ unsigned long limit, ptr_size;
/*
* Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
@@ -633,6 +641,20 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
}
EXPORT_SYMBOL(copy_string_kernel);
+static int copy_strings_kernel(int argc, const char *const *argv,
+ struct linux_binprm *bprm)
+{
+ while (argc-- > 0) {
+ int ret = copy_string_kernel(argv[argc], bprm);
+ if (ret < 0)
+ return ret;
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
+ cond_resched();
+ }
+ return 0;
+}
+
#ifdef CONFIG_MMU
/*
@@ -1543,6 +1565,10 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)
static void free_bprm(struct linux_binprm *bprm)
{
+ if (bprm->mm) {
+ acct_arg_size(bprm, 0);
+ mmput(bprm->mm);
+ }
free_arg_pages(bprm);
if (bprm->cred) {
mutex_unlock(&current->signal->cred_guard_mutex);
@@ -1557,9 +1583,43 @@ static void free_bprm(struct linux_binprm *bprm)
/* If a binfmt changed the interp, free it. */
if (bprm->interp != bprm->filename)
kfree(bprm->interp);
+ kfree(bprm->fdpath);
kfree(bprm);
}
+static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
+{
+ struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+ int retval = -ENOMEM;
+ if (!bprm)
+ goto out;
+
+ if (fd == AT_FDCWD || filename->name[0] == '/') {
+ bprm->filename = filename->name;
+ } else {
+ if (filename->name[0] == '\0')
+ bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
+ else
+ bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
+ fd, filename->name);
+ if (!bprm->fdpath)
+ goto out_free;
+
+ bprm->filename = bprm->fdpath;
+ }
+ bprm->interp = bprm->filename;
+
+ retval = bprm_mm_init(bprm);
+ if (retval)
+ goto out_free;
+ return bprm;
+
+out_free:
+ free_bprm(bprm);
+out:
+ return ERR_PTR(retval);
+}
+
int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
{
/* If a binfmt changed the interp, free it first. */
@@ -1818,53 +1878,25 @@ static int exec_binprm(struct linux_binprm *bprm)
/*
* sys_execve() executes a new program.
*/
-static int __do_execve_file(int fd, struct filename *filename,
- struct user_arg_ptr argv,
- struct user_arg_ptr envp,
- int flags, struct file *file)
+static int bprm_execve(struct linux_binprm *bprm,
+ int fd, struct filename *filename, int flags)
{
- char *pathbuf = NULL;
- struct linux_binprm *bprm;
+ struct file *file;
struct files_struct *displaced;
int retval;
- if (IS_ERR(filename))
- return PTR_ERR(filename);
-
- /*
- * We move the actual failure in case of RLIMIT_NPROC excess from
- * set*uid() to execve() because too many poorly written programs
- * don't check setuid() return code. Here we additionally recheck
- * whether NPROC limit is still exceeded.
- */
- if ((current->flags & PF_NPROC_EXCEEDED) &&
- atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
- retval = -EAGAIN;
- goto out_ret;
- }
-
- /* We're below the limit (still or again), so we don't want to make
- * further execve() calls fail. */
- current->flags &= ~PF_NPROC_EXCEEDED;
-
retval = unshare_files(&displaced);
if (retval)
- goto out_ret;
-
- retval = -ENOMEM;
- bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
- if (!bprm)
- goto out_files;
+ return retval;
retval = prepare_bprm_creds(bprm);
if (retval)
- goto out_free;
+ goto out_files;
check_unsafe_exec(bprm);
current->in_execve = 1;
- if (!file)
- file = do_open_execat(fd, filename, flags);
+ file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
@@ -1872,57 +1904,20 @@ static int __do_execve_file(int fd, struct filename *filename,
sched_exec();
bprm->file = file;
- if (!filename) {
- bprm->filename = "none";
- } else if (fd == AT_FDCWD || filename->name[0] == '/') {
- bprm->filename = filename->name;
- } else {
- if (filename->name[0] == '\0')
- pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
- else
- pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
- fd, filename->name);
- if (!pathbuf) {
- retval = -ENOMEM;
- goto out_unmark;
- }
- /*
- * Record that a name derived from an O_CLOEXEC fd will be
- * inaccessible after exec. Relies on having exclusive access to
- * current->files (due to unshare_files above).
- */
- if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
- bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
- bprm->filename = pathbuf;
- }
- bprm->interp = bprm->filename;
-
- retval = bprm_mm_init(bprm);
- if (retval)
- goto out_unmark;
-
- retval = prepare_arg_pages(bprm, argv, envp);
- if (retval < 0)
- goto out;
+ /*
+ * Record that a name derived from an O_CLOEXEC fd will be
+ * inaccessible after exec. Relies on having exclusive access to
+ * current->files (due to unshare_files above).
+ */
+ if (bprm->fdpath &&
+ close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+ bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
/* Set the unchanging part of bprm->cred */
retval = security_bprm_creds_for_exec(bprm);
if (retval)
goto out;
- retval = copy_string_kernel(bprm->filename, bprm);
- if (retval < 0)
- goto out;
-
- bprm->exec = bprm->p;
- retval = copy_strings(bprm->envc, envp, bprm);
- if (retval < 0)
- goto out;
-
- retval = copy_strings(bprm->argc, argv, bprm);
- if (retval < 0)
- goto out;
-
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
@@ -1933,10 +1928,6 @@ static int __do_execve_file(int fd, struct filename *filename,
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
- free_bprm(bprm);
- kfree(pathbuf);
- if (filename)
- putname(filename);
if (displaced)
put_files_struct(displaced);
return retval;
@@ -1950,25 +1941,15 @@ out:
*/
if (bprm->point_of_no_return && !fatal_signal_pending(current))
force_sigsegv(SIGSEGV);
- if (bprm->mm) {
- acct_arg_size(bprm, 0);
- mmput(bprm->mm);
- }
out_unmark:
current->fs->in_exec = 0;
current->in_execve = 0;
-out_free:
- free_bprm(bprm);
- kfree(pathbuf);
-
out_files:
if (displaced)
reset_files_struct(displaced);
-out_ret:
- if (filename)
- putname(filename);
+
return retval;
}
@@ -1977,18 +1958,124 @@ static int do_execveat_common(int fd, struct filename *filename,
struct user_arg_ptr envp,
int flags)
{
- return __do_execve_file(fd, filename, argv, envp, flags, NULL);
+ struct linux_binprm *bprm;
+ int retval;
+
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
+
+ /*
+ * We move the actual failure in case of RLIMIT_NPROC excess from
+ * set*uid() to execve() because too many poorly written programs
+ * don't check setuid() return code. Here we additionally recheck
+ * whether NPROC limit is still exceeded.
+ */
+ if ((current->flags & PF_NPROC_EXCEEDED) &&
+ atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+ retval = -EAGAIN;
+ goto out_ret;
+ }
+
+ /* We're below the limit (still or again), so we don't want to make
+ * further execve() calls fail. */
+ current->flags &= ~PF_NPROC_EXCEEDED;
+
+ bprm = alloc_bprm(fd, filename);
+ if (IS_ERR(bprm)) {
+ retval = PTR_ERR(bprm);
+ goto out_ret;
+ }
+
+ retval = count(argv, MAX_ARG_STRINGS);
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = retval;
+
+ retval = count(envp, MAX_ARG_STRINGS);
+ if (retval < 0)
+ goto out_free;
+ bprm->envc = retval;
+
+ retval = bprm_stack_limits(bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_string_kernel(bprm->filename, bprm);
+ if (retval < 0)
+ goto out_free;
+ bprm->exec = bprm->p;
+
+ retval = copy_strings(bprm->envc, envp, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_strings(bprm->argc, argv, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = bprm_execve(bprm, fd, filename, flags);
+out_free:
+ free_bprm(bprm);
+
+out_ret:
+ putname(filename);
+ return retval;
}
-int do_execve_file(struct file *file, void *__argv, void *__envp)
+int kernel_execve(const char *kernel_filename,
+ const char *const *argv, const char *const *envp)
{
- struct user_arg_ptr argv = { .ptr.native = __argv };
- struct user_arg_ptr envp = { .ptr.native = __envp };
+ struct filename *filename;
+ struct linux_binprm *bprm;
+ int fd = AT_FDCWD;
+ int retval;
+
+ filename = getname_kernel(kernel_filename);
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
+
+ bprm = alloc_bprm(fd, filename);
+ if (IS_ERR(bprm)) {
+ retval = PTR_ERR(bprm);
+ goto out_ret;
+ }
+
+ retval = count_strings_kernel(argv);
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = retval;
+
+ retval = count_strings_kernel(envp);
+ if (retval < 0)
+ goto out_free;
+ bprm->envc = retval;
- return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
+ retval = bprm_stack_limits(bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_string_kernel(bprm->filename, bprm);
+ if (retval < 0)
+ goto out_free;
+ bprm->exec = bprm->p;
+
+ retval = copy_strings_kernel(bprm->envc, envp, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_strings_kernel(bprm->argc, argv, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = bprm_execve(bprm, fd, filename, 0);
+out_free:
+ free_bprm(bprm);
+out_ret:
+ putname(filename);
+ return retval;
}
-int do_execve(struct filename *filename,
+static int do_execve(struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp)
{
@@ -1997,7 +2084,7 @@ int do_execve(struct filename *filename,
return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}
-int do_execveat(int fd, struct filename *filename,
+static int do_execveat(int fd, struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp,
int flags)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 4a20b7517dd0..0571701ab1c5 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -45,17 +45,18 @@ struct linux_binprm {
#ifdef __alpha__
unsigned int taso:1;
#endif
- struct file * executable; /* Executable to pass to the interpreter */
- struct file * interpreter;
- struct file * file;
+ struct file *executable; /* Executable to pass to the interpreter */
+ struct file *interpreter;
+ struct file *file;
struct cred *cred; /* new credentials */
int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */
unsigned int per_clear; /* bits to clear in current->personality */
int argc, envc;
- const char * filename; /* Name of binary as seen by procps */
- const char * interp; /* Name of the binary really executed. Most
+ const char *filename; /* Name of binary as seen by procps */
+ const char *interp; /* Name of the binary really executed. Most
of the time same as filename, but could be
different for binfmt_{misc,script} */
+ const char *fdpath; /* generated filename for execveat */
unsigned interp_flags;
int execfd; /* File descriptor of the executable */
unsigned long loader, exec;
@@ -134,13 +135,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
extern void set_binfmt(struct linux_binfmt *new);
extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
-extern int do_execve(struct filename *,
- const char __user * const __user *,
- const char __user * const __user *);
-extern int do_execveat(int, struct filename *,
- const char __user * const __user *,
- const char __user * const __user *,
- int);
-int do_execve_file(struct file *file, void *__argv, void *__envp);
+int kernel_execve(const char *filename,
+ const char *const *argv, const char *const *envp);
#endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index d815622cd31e..9b114c718a76 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -3,22 +3,23 @@
#define _LINUX_BPFILTER_H
#include <uapi/linux/bpfilter.h>
-#include <linux/umh.h>
+#include <linux/usermode_driver.h>
struct sock;
int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
unsigned int optlen);
int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
int __user *optlen);
+void bpfilter_umh_cleanup(struct umd_info *info);
+
struct bpfilter_umh_ops {
- struct umh_info info;
+ struct umd_info info;
/* since ip_getsockopt() can run in parallel, serialize access to umh */
struct mutex lock;
int (*sockopt)(struct sock *sk, int optname,
char __user *optval,
unsigned int optlen, bool is_set);
int (*start)(void);
- bool stop;
};
extern struct bpfilter_umh_ops bpfilter_ops;
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d6683b48c2a..06ec60462af0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1507,7 +1507,6 @@ extern struct pid *cad_pid;
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
-#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
@@ -2016,14 +2015,6 @@ static inline void rseq_execve(struct task_struct *t)
#endif
-void __exit_umh(struct task_struct *tsk);
-
-static inline void exit_umh(struct task_struct *tsk)
-{
- if (unlikely(tsk->flags & PF_UMH))
- __exit_umh(tsk);
-}
-
#ifdef CONFIG_DEBUG_RSEQ
void rseq_syscall(struct pt_regs *regs);
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 0ee5e696c5d8..1bad18a1d8ba 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -674,6 +674,8 @@ static inline int thread_group_empty(struct task_struct *p)
#define delay_group_leader(p) \
(thread_group_leader(p) && !thread_group_empty(p))
+extern bool thread_group_exited(struct pid *pid);
+
extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
unsigned long *flags);
diff --git a/include/linux/umh.h b/include/linux/umh.h
index 0c08de356d0d..244aff638220 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -22,10 +22,8 @@ struct subprocess_info {
const char *path;
char **argv;
char **envp;
- struct file *file;
int wait;
int retval;
- pid_t pid;
int (*init)(struct subprocess_info *info, struct cred *new);
void (*cleanup)(struct subprocess_info *info);
void *data;
@@ -40,19 +38,6 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data);
-struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
- int (*init)(struct subprocess_info *info, struct cred *new),
- void (*cleanup)(struct subprocess_info *), void *data);
-struct umh_info {
- const char *cmdline;
- struct file *pipe_to_umh;
- struct file *pipe_from_umh;
- struct list_head list;
- void (*cleanup)(struct umh_info *info);
- pid_t pid;
-};
-int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
-
extern int
call_usermodehelper_exec(struct subprocess_info *info, int wait);
diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h
new file mode 100644
index 000000000000..073a9e0ec07d
--- /dev/null
+++ b/include/linux/usermode_driver.h
@@ -0,0 +1,18 @@
+#ifndef __LINUX_USERMODE_DRIVER_H__
+#define __LINUX_USERMODE_DRIVER_H__
+
+#include <linux/umh.h>
+#include <linux/path.h>
+
+struct umd_info {
+ const char *driver_name;
+ struct file *pipe_to_umh;
+ struct file *pipe_from_umh;
+ struct path wd;
+ struct pid *tgid;
+};
+int umd_load_blob(struct umd_info *info, const void *data, size_t len);
+int umd_unload_blob(struct umd_info *info);
+int fork_usermode_driver(struct umd_info *info);
+
+#endif /* __LINUX_USERMODE_DRIVER_H__ */
diff --git a/init/main.c b/init/main.c
index 9127b240fd26..15bd0efff3df 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1331,9 +1331,7 @@ static int run_init_process(const char *init_filename)
pr_debug(" with environment:\n");
for (p = envp_init; *p; p++)
pr_debug(" %s\n", *p);
- return do_execve(getname_kernel(init_filename),
- (const char __user *const __user *)argv_init,
- (const char __user *const __user *)envp_init);
+ return kernel_execve(init_filename, argv_init, envp_init);
}
static int try_to_run_init_process(const char *init_filename)
diff --git a/kernel/Makefile b/kernel/Makefile
index 155b5380500a..23f189743c73 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -12,6 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \
notifier.o ksysfs.o cred.o reboot.o \
async.o range.o smpboot.o ucount.o
+obj-$(CONFIG_BPFILTER) += usermode_driver.o
obj-$(CONFIG_MODULES) += kmod.o
obj-$(CONFIG_MULTIUSER) += groups.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 08b7ffdd0f18..e731c414e024 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -805,7 +805,6 @@ void __noreturn do_exit(long code)
exit_task_namespaces(tsk);
exit_task_work(tsk);
exit_thread(tsk);
- exit_umh(tsk);
/*
* Flush inherited counters to the parent - before the parent
@@ -1712,6 +1711,30 @@ Efault:
}
#endif
+/**
+ * thread_group_exited - check that a thread group has exited
+ * @pid: tgid of thread group to be checked.
+ *
+ * Test if the thread group represented by tgid has exited (all
+ * threads are zombies, dead or completely gone).
+ *
+ * Return: true if the thread group has exited. false otherwise.
+ */
+bool thread_group_exited(struct pid *pid)
+{
+ struct task_struct *task;
+ bool exited;
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ exited = !task ||
+ (READ_ONCE(task->exit_state) && thread_group_empty(task));
+ rcu_read_unlock();
+
+ return exited;
+}
+EXPORT_SYMBOL(thread_group_exited);
+
__weak void abort(void)
{
BUG();
diff --git a/kernel/fork.c b/kernel/fork.c
index c1022532a94d..40996d2a56b9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1792,22 +1792,18 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
*/
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
- struct task_struct *task;
struct pid *pid = file->private_data;
__poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
- rcu_read_lock();
- task = pid_task(pid, PIDTYPE_PID);
/*
* Inform pollers only when the whole thread group exits.
* If the thread group leader exits before all other threads in the
* group, then poll(2) should block, similar to the wait(2) family.
*/
- if (!task || (task->exit_state && thread_group_empty(task)))
+ if (thread_group_exited(pid))
poll_flags = EPOLLIN | EPOLLRDNORM;
- rcu_read_unlock();
return poll_flags;
}
diff --git a/kernel/umh.c b/kernel/umh.c
index 79f139a7ca03..a25433f9cd9a 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -26,8 +26,6 @@
#include <linux/ptrace.h>
#include <linux/async.h>
#include <linux/uaccess.h>
-#include <linux/shmem_fs.h>
-#include <linux/pipe_fs_i.h>
#include <trace/events/module.h>
@@ -38,8 +36,6 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
static DEFINE_SPINLOCK(umh_sysctl_lock);
static DECLARE_RWSEM(umhelper_sem);
-static LIST_HEAD(umh_list);
-static DEFINE_MUTEX(umh_list_lock);
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
@@ -102,16 +98,9 @@ static int call_usermodehelper_exec_async(void *data)
commit_creds(new);
- sub_info->pid = task_pid_nr(current);
- if (sub_info->file) {
- retval = do_execve_file(sub_info->file,
- sub_info->argv, sub_info->envp);
- if (!retval)
- current->flags |= PF_UMH;
- } else
- retval = do_execve(getname_kernel(sub_info->path),
- (const char __user *const __user *)sub_info->argv,
- (const char __user *const __user *)sub_info->envp);
+ retval = kernel_execve(sub_info->path,
+ (const char *const *)sub_info->argv,
+ (const char *const *)sub_info->envp);
out:
sub_info->retval = retval;
/*
@@ -405,140 +394,6 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
}
EXPORT_SYMBOL(call_usermodehelper_setup);
-struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
- int (*init)(struct subprocess_info *info, struct cred *new),
- void (*cleanup)(struct subprocess_info *info), void *data)
-{
- struct subprocess_info *sub_info;
- struct umh_info *info = data;
- const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper";
-
- sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
- if (!sub_info)
- return NULL;
-
- sub_info->argv = argv_split(GFP_KERNEL, cmdline, NULL);
- if (!sub_info->argv) {
- kfree(sub_info);
- return NULL;
- }
-
- INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
- sub_info->path = "none";
- sub_info->file = file;
- sub_info->init = init;
- sub_info->cleanup = cleanup;
- sub_info->data = data;
- return sub_info;
-}
-
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
- struct umh_info *umh_info = info->data;
- struct file *from_umh[2];
- struct file *to_umh[2];
- int err;
-
- /* create pipe to send data to umh */
- err = create_pipe_files(to_umh, 0);
- if (err)
- return err;
- err = replace_fd(0, to_umh[0], 0);
- fput(to_umh[0]);
- if (err < 0) {
- fput(to_umh[1]);
- return err;
- }
-
- /* create pipe to receive data from umh */
- err = create_pipe_files(from_umh, 0);
- if (err) {
- fput(to_umh[1]);
- replace_fd(0, NULL, 0);
- return err;
- }
- err = replace_fd(1, from_umh[1], 0);
- fput(from_umh[1]);
- if (err < 0) {
- fput(to_umh[1]);
- replace_fd(0, NULL, 0);
- fput(from_umh[0]);
- return err;
- }
-
- umh_info->pipe_to_umh = to_umh[1];
- umh_info->pipe_from_umh = from_umh[0];
- return 0;
-}
-
-static void umh_clean_and_save_pid(struct subprocess_info *info)
-{
- struct umh_info *umh_info = info->data;
-
- /* cleanup if umh_pipe_setup() was successful but exec failed */
- if (info->pid && info->retval) {
- fput(umh_info->pipe_to_umh);
- fput(umh_info->pipe_from_umh);
- }
-
- argv_free(info->argv);
- umh_info->pid = info->pid;
-}
-
-/**
- * fork_usermode_blob - fork a blob of bytes as a usermode process
- * @data: a blob of bytes that can be do_execv-ed as a file
- * @len: length of the blob
- * @info: information about usermode process (shouldn't be NULL)
- *
- * If info->cmdline is set it will be used as command line for the
- * user process, else "usermodehelper" is used.
- *
- * Returns either negative error or zero which indicates success
- * in executing a blob of bytes as a usermode process. In such
- * case 'struct umh_info *info' is populated with two pipes
- * and a pid of the process. The caller is responsible for health
- * check of the user process, killing it via pid, and closing the
- * pipes when user process is no longer needed.
- */
-int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
-{
- struct subprocess_info *sub_info;
- struct file *file;
- ssize_t written;
- loff_t pos = 0;
- int err;
-
- file = shmem_kernel_file_setup("", len, 0);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- written = kernel_write(file, data, len, &pos);
- if (written != len) {
- err = written;
- if (err >= 0)
- err = -ENOMEM;
- goto out;
- }
-
- err = -ENOMEM;
- sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
- umh_clean_and_save_pid, info);
- if (!sub_info)
- goto out;
-
- err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
- if (!err) {
- mutex_lock(&umh_list_lock);
- list_add(&info->list, &umh_list);