diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | include/linux/pid.h | 7 | ||||
-rw-r--r-- | include/linux/pid_namespace.h | 2 | ||||
-rw-r--r-- | include/linux/sched/task.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/sched.h | 64 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 100 | ||||
-rw-r--r-- | kernel/pid.c | 86 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/clone3/.gitignore | 3 | ||||
-rw-r--r-- | tools/testing/selftests/clone3/Makefile | 6 | ||||
-rw-r--r-- | tools/testing/selftests/clone3/clone3.c | 202 | ||||
-rw-r--r-- | tools/testing/selftests/clone3/clone3_clear_sighand.c | 129 | ||||
-rw-r--r-- | tools/testing/selftests/clone3/clone3_selftests.h | 63 | ||||
-rw-r--r-- | tools/testing/selftests/clone3/clone3_set_tid.c | 397 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_fdinfo_test.c | 296 |
18 files changed, 1308 insertions, 58 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a5689e2c1aa5..3e833d20d0f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12861,6 +12861,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git F: samples/pidfd/ F: tools/testing/selftests/pidfd/ +F: tools/testing/selftests/clone3/ K: (?i)pidfd K: (?i)clone3 K: \b(clone_args|kernel_clone_args)\b diff --git a/include/linux/pid.h b/include/linux/pid.h index 9645b1194c98..998ae7d24450 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -85,6 +85,10 @@ static inline struct pid *get_pid(struct pid *pid) extern void put_pid(struct pid *pid); extern struct task_struct *pid_task(struct pid *pid, enum pid_type); +static inline bool pid_has_task(struct pid *pid, enum pid_type type) +{ + return !hlist_empty(&pid->tasks[type]); +} extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type); extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); @@ -120,7 +124,8 @@ extern struct pid *find_vpid(int nr); extern struct pid *find_get_pid(int nr); extern struct pid *find_ge_pid(int nr, struct pid_namespace *); -extern struct pid *alloc_pid(struct pid_namespace *ns); +extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, + size_t set_tid_size); extern void free_pid(struct pid *pid); extern void disable_pid_allocation(struct pid_namespace *ns); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 49538b172483..2ed6af88794b 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -12,6 +12,8 @@ #include <linux/ns_common.h> #include <linux/idr.h> +/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ +#define MAX_PID_NS_LEVEL 32 struct fs_pin; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4b1c3b664f51..f1879884238e 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -26,6 +26,9 @@ struct kernel_clone_args { unsigned long stack; unsigned long stack_size; unsigned long tls; + pid_t *set_tid; + /* Number of elements in *set_tid */ + size_t set_tid_size; }; /* diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 25b4fa00bad1..4a0217832464 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -33,31 +33,48 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ +/* Flags for the clone3() syscall. */ +#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ + #ifndef __ASSEMBLY__ /** * struct clone_args - arguments for the clone3 syscall - * @flags: Flags for the new process as listed above. - * All flags are valid except for CSIGNAL and - * CLONE_DETACHED. - * @pidfd: If CLONE_PIDFD is set, a pidfd will be - * returned in this argument. - * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the - * child process will be returned in the child's - * memory. - * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of - * the child process will be returned in the - * parent's memory. - * @exit_signal: The exit_signal the parent process will be - * sent when the child exits. - * @stack: Specify the location of the stack for the - * child process. - * Note, @stack is expected to point to the - * lowest address. The stack direction will be - * determined by the kernel and set up - * appropriately based on @stack_size. - * @stack_size: The size of the stack for the child process. - * @tls: If CLONE_SETTLS is set, the tls descriptor - * is set to tls. + * @flags: Flags for the new process as listed above. + * All flags are valid except for CSIGNAL and + * CLONE_DETACHED. + * @pidfd: If CLONE_PIDFD is set, a pidfd will be + * returned in this argument. + * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the + * child process will be returned in the child's + * memory. + * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of + * the child process will be returned in the + * parent's memory. + * @exit_signal: The exit_signal the parent process will be + * sent when the child exits. + * @stack: Specify the location of the stack for the + * child process. + * Note, @stack is expected to point to the + * lowest address. The stack direction will be + * determined by the kernel and set up + * appropriately based on @stack_size. + * @stack_size: The size of the stack for the child process. + * @tls: If CLONE_SETTLS is set, the tls descriptor + * is set to tls. + * @set_tid: Pointer to an array of type *pid_t. The size + * of the array is defined using @set_tid_size. + * This array is used to select PIDs/TIDs for + * newly created processes. The first element in + * this defines the PID in the most nested PID + * namespace. Each additional element in the array + * defines the PID in the parent PID namespace of + * the original PID namespace. If the array has + * less entries than the number of currently + * nested PID namespaces only the PIDs in the + * corresponding namespaces are set. + * @set_tid_size: This defines the size of the array referenced + * in @set_tid. This cannot be larger than the + * kernel's limit of nested PID namespaces. * * The structure is versioned by size and thus extensible. * New struct members must go at the end of the struct and @@ -72,10 +89,13 @@ struct clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; }; #endif #define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ /* * Scheduling policies diff --git a/kernel/exit.c b/kernel/exit.c index a46a50d67002..f2d20ab74422 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1457,7 +1457,7 @@ repeat: */ wo->notask_error = -ECHILD; if ((wo->wo_type < PIDTYPE_MAX) && - (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) + (!wo->wo_pid || !pid_has_task(wo->wo_pid, wo->wo_type))) goto notask; set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/fork.c b/kernel/fork.c index 13b38794efb5..35f91ee91057 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1517,6 +1517,11 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); spin_unlock_irq(¤t->sighand->siglock); + + /* Reset all signal handler not set to SIG_IGN to SIG_DFL. */ + if (clone_flags & CLONE_CLEAR_SIGHAND) + flush_signal_handlers(tsk, 0); + return 0; } @@ -1695,12 +1700,68 @@ static int pidfd_release(struct inode *inode, struct file *file) } #ifdef CONFIG_PROC_FS +/** + * pidfd_show_fdinfo - print information about a pidfd + * @m: proc fdinfo file + * @f: file referencing a pidfd + * + * Pid: + * This function will print the pid that a given pidfd refers to in the + * pid namespace of the procfs instance. + * If the pid namespace of the process is not a descendant of the pid + * namespace of the procfs instance 0 will be shown as its pid. This is + * similar to calling getppid() on a process whose parent is outside of + * its pid namespace. + * + * NSpid: + * If pid namespaces are supported then this function will also print + * the pid of a given pidfd refers to for all descendant pid namespaces + * starting from the current pid namespace of the instance, i.e. the + * Pid field and the first entry in the NSpid field will be identical. + * If the pid namespace of the process is not a descendant of the pid + * namespace of the procfs instance 0 will be shown as its first NSpid + * entry and no others will be shown. + * Note that this differs from the Pid and NSpid fields in + * /proc/<pid>/status where Pid and NSpid are always shown relative to + * the pid namespace of the procfs instance. The difference becomes + * obvious when sending around a pidfd between pid namespaces from a + * different branch of the tree, i.e. where no ancestoral relation is + * present between the pid namespaces: + * - create two new pid namespaces ns1 and ns2 in the initial pid + * namespace (also take care to create new mount namespaces in the + * new pid namespace and mount procfs) + * - create a process with a pidfd in ns1 + * - send pidfd from ns1 to ns2 + * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid + * have exactly one entry, which is 0 + */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { - struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); struct pid *pid = f->private_data; + struct pid_namespace *ns; + pid_t nr = -1; + + if (likely(pid_has_task(pid, PIDTYPE_PID))) { + ns = proc_pid_ns(file_inode(m->file)); + nr = pid_nr_ns(pid, ns); + } + + seq_put_decimal_ll(m, "Pid:\t", nr); + +#ifdef CONFIG_PID_NS + seq_put_decimal_ll(m, "\nNSpid:\t", nr); + if (nr > 0) { + int i; - seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns)); + /* If nr is non-zero it means that 'pid' is valid and that + * ns, i.e. the pid namespace associated with the procfs + * instance, is in the pid namespace hierarchy of pid. + * Start at one below the already printed level. + */ + for (i = ns->level + 1; i <= pid->level; i++) + seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); + } +#endif seq_putc(m, '\n'); } #endif @@ -2026,7 +2087,8 @@ static __latent_entropy struct task_struct *copy_process( stackleak_task_init(p); if (pid != &init_struct_pid) { - pid = alloc_pid(p->nsproxy->pid_ns_for_children); + pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid, + args->set_tid_size); if (IS_ERR(pid)) { retval = PTR_ERR(pid); goto bad_fork_cleanup_thread; @@ -2529,6 +2591,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, { int err; struct clone_args args; + pid_t *kset_tid = kargs->set_tid; if (unlikely(usize > PAGE_SIZE)) return -E2BIG; @@ -2539,6 +2602,15 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, if (err) return err; + if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL)) + return -EINVAL; + + if (unlikely(!args.set_tid && args.set_tid_size > 0)) + return -EINVAL; + + if (unlikely(args.set_tid && args.set_tid_size == 0)) + return -EINVAL; + /* * Verify that higher 32bits of exit_signal are unset and that * it is a valid signal @@ -2556,8 +2628,16 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, .stack = args.stack, .stack_size = args.stack_size, .tls = args.tls, + .set_tid_size = args.set_tid_size, }; + if (args.set_tid && + copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), + (kargs->set_tid_size * sizeof(pid_t)))) + return -EFAULT; + + kargs->set_tid = kset_tid; + return 0; } @@ -2591,11 +2671,8 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) static bool clone3_args_valid(struct kernel_clone_args *kargs) { - /* - * All lower bits of the flag word are taken. - * Verify that no other unknown flags are passed along. - */ - if (kargs->flags & ~CLONE_LEGACY_FLAGS) + /* Verify that no unknown flags are passed along. */ + if (kargs->flags & ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND)) return false; /* @@ -2605,6 +2682,10 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs) if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) return false; + if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == + (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) + return false; + if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && kargs->exit_signal) return false; @@ -2631,6 +2712,9 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) int err; struct kernel_clone_args kargs; + pid_t set_tid[MAX_PID_NS_LEVEL]; + + kargs.set_tid = set_tid; err = copy_clone_args_from_user(&kargs, uargs, size); if (err) diff --git a/kernel/pid.c b/kernel/pid.c index 0a9f2e437217..2278e249141d 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -157,7 +157,8 @@ void free_pid(struct pid *pid) call_rcu(&pid->rcu, delayed_put_pid); } -struct pid *alloc_pid(struct pid_namespace *ns) +struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, + size_t set_tid_size) { struct pid *pid; enum pid_type type; @@ -166,6 +167,17 @@ struct pid *alloc_pid(struct pid_namespace *ns) struct upid *upid; int retval = -ENOMEM; + /* + * set_tid_size contains the size of the set_tid array. Starting at + * the most nested currently active PID namespace it tells alloc_pid() + * which PID to set for a process in that most nested PID namespace + * up to set_tid_size PID namespaces. It does not have to set the PID + * for a process in all nested PID namespaces but set_tid_size must + * never be greater than the current ns->level + 1. + */ + if (set_tid_size > ns->level + 1) + return ERR_PTR(-EINVAL); + pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); if (!pid) return ERR_PTR(retval); @@ -174,24 +186,54 @@ struct pid *alloc_pid(struct pid_namespace *ns) pid->level = ns->level; for (i = ns->level; i >= 0; i--) { - int pid_min = 1; + int tid = 0; + + if (set_tid_size) { + tid = set_tid[ns->level - i]; + + retval = -EINVAL; + if (tid < 1 || tid >= pid_max) + goto out_free; + /* + * Also fail if a PID != 1 is requested and + * no PID 1 exists. + */ + if (tid != 1 && !tmp->child_reaper) + goto out_free; + retval = -EPERM; + if (!ns_capable(tmp->user_ns, CAP_SYS_ADMIN)) + goto out_free; + set_tid_size--; + } idr_preload(GFP_KERNEL); spin_lock_irq(&pidmap_lock); - /* - * init really needs pid 1, but after reaching the maximum - * wrap back to RESERVED_PIDS - */ - if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS) - pid_min = RESERVED_PIDS; - - /* - * Store a null pointer so find_pid_ns does not find - * a partially initialized PID (see below). - */ - nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, - pid_max, GFP_ATOMIC); + if (tid) { + nr = idr_alloc(&tmp->idr, NULL, tid, + tid + 1, GFP_ATOMIC); + /* + * If ENOSPC is returned it means that the PID is + * alreay in use. Return EEXIST in that case. + */ + if (nr == -ENOSPC) + nr = -EEXIST; + } else { + int pid_min = 1; + /* + * init really needs pid 1, but after reaching the + * maximum wrap back to RESERVED_PIDS + */ + if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS) + pid_min = RESERVED_PIDS; + + /* + * Store a null pointer so find_pid_ns does not find + * a partially initialized PID (see below). + */ + nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, + pid_max, GFP_ATOMIC); + } spin_unlock_irq(&pidmap_lock); idr_preload_end(); @@ -299,7 +341,7 @@ static void __change_pid(struct task_struct *task, enum pid_type type, *pid_ptr = new; for (tmp = PIDTYPE_MAX; --tmp >= 0; ) - if (!hlist_empty(&pid->tasks[tmp])) + if (pid_has_task(pid, tmp)) return; free_pid(pid); @@ -497,7 +539,7 @@ static int pidfd_create(struct pid *pid) */ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) { - int fd, ret; + int fd; struct pid *p; if (flags) @@ -510,13 +552,11 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) if (!p) return -ESRCH; - ret = 0; - rcu_read_lock(); - if (!pid_task(p, PIDTYPE_TGID)) - ret = -EINVAL; - rcu_read_unlock(); + if (pid_has_task(p, PIDTYPE_TGID)) + fd = pidfd_create(p); + else + fd = -EINVAL; - fd = ret ?: pidfd_create(p); put_pid(p); return fd; } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a6a79f85c81a..d40017e79ebe 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -26,8 +26,6 @@ static DEFINE_MUTEX(pid_caches_mutex); static struct kmem_cache *pid_ns_cachep; -/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ -#define MAX_PID_NS_LEVEL 32 /* Write once array, filled from the beginning. */ static struct kmem_cache *pid_cache[MAX_PID_NS_LEVEL]; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 0b6b81ba4421..0fb95f25944d 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -5,6 +5,7 @@ TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities TARGETS += cgroup +TARGETS += clone3 TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += drivers/dma-buf diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore new file mode 100644 index 000000000000..0dc4f32c6cb8 --- /dev/null +++ b/tools/testing/selftests/clone3/.gitignore @@ -0,0 +1,3 @@ +clone3 +clone3_clear_sighand +clone3_set_tid diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile new file mode 100644 index 000000000000..cf976c732906 --- /dev/null +++ b/tools/testing/selftests/clone3/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -g -I../../../../usr/include/ + +TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid + +include ../lib.mk diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c new file mode 100644 index 000000000000..f14c269a5a18 --- /dev/null +++ b/tools/testing/selftests/clone3/clone3.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Based on Christian Brauner's clone3() example */ + +#define _GNU_SOURCE +#include <errno.h> +#include <inttypes.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sched.h> + +#include "../kselftest.h" +#include "clone3_selftests.h" + +/* + * Different sizes of struct clone_args + */ +#ifndef CLONE3_ARGS_SIZE_V0 +#define CLONE3_ARGS_SIZE_V0 64 +#endif + +enum test_mode { + CLONE3_ARGS_NO_TEST, + CLONE3_ARGS_ALL_0, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG, +}; + +static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) +{ + struct clone_args args = { + .flags = flags, + .exit_signal = SIGCHLD, + }; + + struct clone_args_extended { + struct clone_args args; + __aligned_u64 excess_space[2]; + } args_ext; + + pid_t pid = -1; + int status; + + memset(&args_ext, 0, sizeof(args_ext)); + if (size > sizeof(struct clone_args)) + args_ext.excess_space[1] = 1; + + if (size == 0) + size = sizeof(struct clone_args); + + switch (test_mode) { + case CLONE3_ARGS_ALL_0: + args.flags = 0; + args.exit_signal = 0; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG: + args.exit_signal = 0xbadc0ded00000000ULL; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG: + args.exit_signal = 0x0000000080000000ULL; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG: + args.exit_signal = 0x0000000000000100ULL; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG: + args.exit_signal = 0x00000000000000f0ULL; + break; + } + + memcpy(&args_ext.args, &args, sizeof(struct clone_args)); + + pid = sys_clone3((struct clone_args *)&args_ext, size); + if (pid < 0) { + ksft_print_msg("%s - Failed to create new process\n", + strerror(errno)); + return -errno; + } + + if (pid == 0) { + ksft_print_msg("I am the child, my PID is %d\n", getpid()); + _exit(EXIT_SUCCESS); + } + + ksft_print_msg("I am the parent (%d). My child's pid is %d\n", + getpid(), pid); + + if (waitpid(-1, &status, __WALL) < 0) { + ksft_print_msg("Child returned %s\n", strerror(errno)); + return -errno; + } + if (WEXITSTATUS(status)) + return WEXITSTATUS(status); + + return 0; +} + +static void test_clone3(uint64_t flags, size_t size, int expected, + enum test_mode test_mode) +{ + int ret; + + ksft_print_msg( + "[%d] Trying clone3() with flags %#" PRIx64 " (size %zu)\n", + getpid(), flags, size); + ret = call_clone3(flags, size, test_mode); + ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n", + getpid(), ret, expected); + if (ret != expected) + ksft_test_result_fail( + "[%d] Result (%d) is different than expected (%d)\n", + getpid(), ret, expected); + else + ksft_test_result_pass( + "[%d] Result (%d) matches expectation (%d)\n", + getpid(), ret, expected); +} + +int main(int argc, char *argv[]) +{ + pid_t pid; + + uid_t uid = getuid(); + + test_clone3_supported(); + ksft_print_header(); + ksft_set_plan(17); + + /* Just a simple clone3() should return 0.*/ + test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() in a new PID NS.*/ + if (uid == 0) + test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST); + else + ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */ + test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */ + test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with sizeof(struct clone_args) + 8 */ + test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with exit_signal having highest 32 bits non-zero */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG); + + /* Do a clone3() with negative 32-bit exit_signal */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG); + + /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG); + + /* Do a clone3() with NSIG < exit_signal < CSIG */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG); + + test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0); + + test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG, + CLONE3_ARGS_ALL_0); + + test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG, + CLONE3_ARGS_ALL_0); + + /* Do a clone3() with > page size */ + test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */ + if (uid == 0) + test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0, + CLONE3_ARGS_NO_TEST); + else + ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */ + test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, + CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */ + if (uid == 0) + test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0, + CLONE3_ARGS_NO_TEST); + else + ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); + + /* Do a clone3() with > page size in a new PID NS */ + test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG, + CLONE3_ARGS_NO_TEST); + + return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail(); +} diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c new file mode 100644 index 000000000000..9e1af8aa7698 --- /dev/null +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <sys/syscall.h> +#include <sys/wait.h> + +#include "../kselftest.h" +#include "clone3_selftests.h" + +#ifndef CLONE_CLEAR_SIGHAND +#define CLONE_CLEAR_SIGHAND 0x100000000ULL +#endif + +static void nop_handler(int signo) +{ +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static void test_clone3_clear_sighand(void) +{ + int ret; + pid_t pid; + struct clone_args args = {}; + struct sigaction act; + + /* + * Check that CLONE_CLEAR_SIGHAND and CLONE_SIGHAND are mutually + * exclusive. + */ + args.flags |= CLONE_CLEAR_SIGHAND | CLONE_SIGHAND; + args.exit_signal = SIGCHLD; + pid = sys_clone3(&args, sizeof(args)); + if (pid > 0) + ksft_exit_fail_msg( + "clone3(CLONE_CLEAR_SIGHAND | CLONE_SIGHAND) succeeded\n"); + + act.sa_handler = nop_handler; + ret = sigemptyset(&act.sa_mask); + if (ret < 0) + ksft_exit_fail_msg("%s - sigemptyset() failed\n", + strerror(errno)); + + act.sa_flags = 0; + + /* Register signal handler for SIGUSR1 */ + ret = sigaction(SIGUSR1, &act, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s - sigaction(SIGUSR1, &act, NULL) failed\n", + strerror(errno)); + + /* Register signal handler for SIGUSR2 */ + ret = sigaction(SIGUSR2, &act, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s - sigaction(SIGUSR2, &act, NULL) failed\n", + strerror(errno)); + + /* Check that CLONE_CLEAR_SIGHAND works. */ + args.flags = CLONE_CLEAR_SIGHAND; + pid = sys_clone3(&args, sizeof(args)); + if (pid < 0) + ksft_exit_fail_msg("%s - clone3(CLONE_CLEAR_SIGHAND) failed\n", + strerror(errno)); + + if (pid == 0) { + ret = sigaction(SIGUSR1, NULL, &act); + if (ret < 0) + exit(EXIT_FAILURE); + + if (act.sa_handler != SIG_DFL) + exit(EXIT_FAILURE); + + ret = sigaction(SIGUSR2, NULL, &act); + if (ret < 0) + exit(EXIT_FAILURE); + + if (act.sa_handler != SIG_DFL) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); + } + + ret = wait_for_pid(pid); + if (ret) + ksft_exit_fail_msg( + "Failed to clear signal handler for child process\n"); + + ksft_test_result_pass("Cleared signal handlers for child process\n"); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + test_clone3_supported(); + + ksft_set_plan(1); + + test_clone3_clear_sighand(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h new file mode 100644 index 000000000000..a3f2c8ad8bcc --- /dev/null +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _CLONE3_SELFTESTS_H +#define _CLONE3_SELFTESTS_H + +#define _GNU_SOURCE +#include <sched.h> +#include <stdint.h> +#include <syscall.h> +#include <linux/types.h> + +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) + +#ifndef __NR_clone3 +#define __NR_clone3 -1 +struct clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; +}; +#endif + +static pid_t sys_clone3(struct clone_args *args, size_t size) +{ + fflush(stdout); + fflush(stderr); + return syscall(__NR_clone3, args, size); +} + +static inline void test_clone3_supported(void) +{ + pid_t pid; + struct clone_args args = {}; + + if (__NR_clone3 < 0) + ksft_exit_skip("clone3() syscall is not supported\n"); + + /* Set to something that will always cause EINVAL. */ + args.exit_signal = -1; + pid = sys_clone3(&args, sizeof(args)); + if (!pid) + exit(EXIT_SUCCESS); + + if (pid > 0) { + wait(NULL); + ksft_exit_fail_msg( + "Managed to create child process with invalid exit_signal\n"); + } + + if (errno == ENOSYS) + ksft_exit_skip("clone3() syscall is not supported\n"); + + ksft_print_msg("clone3() syscall supported\n"); +} + +#endif /* _CLONE3_SELFTESTS_H */ diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c new file mode 100644 index 000000000000..25beb22f35b5 --- /dev/null +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Based on Christian Brauner's clone3() example. + * These tests are assuming to be running in the host's + * PID namespace. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sched.h> + < |