summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c19
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--kernel/cgroup/cgroup.c50
-rw-r--r--kernel/cgroup/cpuset.c63
-rw-r--r--kernel/dma/swiotlb.c30
-rw-r--r--kernel/events/core.c87
-rw-r--r--kernel/fork.c18
-rw-r--r--kernel/kprobes.c3
-rw-r--r--kernel/locking/lockdep_proc.c5
-rw-r--r--kernel/locking/mutex.c2
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/memremap.c245
-rw-r--r--kernel/module.c60
-rw-r--r--kernel/padata.c12
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/pid.c9
-rw-r--r--kernel/pid_namespace.c3
-rw-r--r--kernel/power/Kconfig6
-rw-r--r--kernel/ptrace.c101
-rw-r--r--kernel/resource.c88
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/signal.c69
-rw-r--r--kernel/sysctl.c215
-rw-r--r--kernel/trace/Kconfig12
-rw-r--r--kernel/trace/ftrace.c48
-rw-r--r--kernel/trace/ring_buffer.c17
-rw-r--r--kernel/trace/trace.c17
-rw-r--r--kernel/trace/trace_event_perf.c3
-rw-r--r--kernel/trace/trace_events.c10
-rw-r--r--kernel/trace/trace_events_filter.c3
-rw-r--r--kernel/trace/trace_kprobe.c357
-rw-r--r--kernel/trace/trace_output.c9
-rw-r--r--kernel/trace/trace_probe.c142
-rw-r--r--kernel/trace/trace_probe.h77
-rw-r--r--kernel/trace/trace_probe_tmpl.h36
-rw-r--r--kernel/trace/trace_uprobe.c180
-rw-r--r--kernel/tracepoint.c4
-rw-r--r--kernel/ucount.c6
38 files changed, 1220 insertions, 807 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 546ebee39e2a..5fcc7a17eb5a 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1073,11 +1073,18 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
!btf_type_is_var(size_type)))
return NULL;
- size = btf->resolved_sizes[size_type_id];
size_type_id = btf->resolved_ids[size_type_id];
size_type = btf_type_by_id(btf, size_type_id);
if (btf_type_nosize_or_null(size_type))
return NULL;
+ else if (btf_type_has_size(size_type))
+ size = size_type->size;
+ else if (btf_type_is_array(size_type))
+ size = btf->resolved_sizes[size_type_id];
+ else if (btf_type_is_ptr(size_type))
+ size = sizeof(void *);
+ else
+ return NULL;
}
*type_id = size_type_id;
@@ -1602,7 +1609,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
const struct btf_type *next_type;
u32 next_type_id = t->type;
struct btf *btf = env->btf;
- u32 next_type_size = 0;
next_type = btf_type_by_id(btf, next_type_id);
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
@@ -1620,7 +1626,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
* save us a few type-following when we use it later (e.g. in
* pretty print).
*/
- if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+ if (!btf_type_id_size(btf, &next_type_id, NULL)) {
if (env_type_is_resolved(env, next_type_id))
next_type = btf_type_id_resolve(btf, &next_type_id);
@@ -1633,7 +1639,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
}
}
- env_stack_pop_resolved(env, next_type_id, next_type_size);
+ env_stack_pop_resolved(env, next_type_id, 0);
return 0;
}
@@ -1645,7 +1651,6 @@ static int btf_var_resolve(struct btf_verifier_env *env,
const struct btf_type *t = v->t;
u32 next_type_id = t->type;
struct btf *btf = env->btf;
- u32 next_type_size;
next_type = btf_type_by_id(btf, next_type_id);
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
@@ -1675,12 +1680,12 @@ static int btf_var_resolve(struct btf_verifier_env *env,
* forward types or similar that would resolve to size of
* zero is allowed.
*/
- if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+ if (!btf_type_id_size(btf, &next_type_id, NULL)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
- env_stack_pop_resolved(env, next_type_id, next_type_size);
+ env_stack_pop_resolved(env, next_type_id, 0);
return 0;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a2e763703c30..5900cbb966b1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1519,9 +1519,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
return -EFAULT;
}
*stack_mask |= 1ull << spi;
- } else if (class == BPF_STX) {
+ } else if (class == BPF_STX || class == BPF_ST) {
if (*reg_mask & dreg)
- /* stx shouldn't be using _scalar_ dst_reg
+ /* stx & st shouldn't be using _scalar_ dst_reg
* to access memory. It means backtracking
* encountered a case of pointer subtraction.
*/
@@ -1540,7 +1540,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
if (!(*stack_mask & (1ull << spi)))
return 0;
*stack_mask &= ~(1ull << spi);
- *reg_mask |= sreg;
+ if (class == BPF_STX)
+ *reg_mask |= sreg;
} else if (class == BPF_JMP || class == BPF_JMP32) {
if (opcode == BPF_CALL) {
if (insn->src_reg == BPF_PSEUDO_CALL)
@@ -1569,10 +1570,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
if (mode == BPF_IND || mode == BPF_ABS)
/* to be analyzed */
return -ENOTSUPP;
- } else if (class == BPF_ST) {
- if (*reg_mask & dreg)
- /* likely pointer subtraction */
- return -ENOTSUPP;
}
return 0;
}
@@ -6106,11 +6103,13 @@ static int check_return_code(struct bpf_verifier_env *env)
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
range = tnum_range(1, 1);
+ break;
case BPF_PROG_TYPE_CGROUP_SKB:
if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
range = tnum_range(0, 3);
enforce_attach_type_range = tnum_range(2, 3);
}
+ break;
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 300b0c416341..753afbca549f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2201,8 +2201,7 @@ static int cgroup_init_fs_context(struct fs_context *fc)
fc->ops = &cgroup_fs_context_ops;
else
fc->ops = &cgroup1_fs_context_ops;
- if (fc->user_ns)
- put_user_ns(fc->user_ns);
+ put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ctx->ns->user_ns);
fc->global = true;
return 0;
@@ -2243,6 +2242,50 @@ static struct file_system_type cgroup2_fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};
+#ifdef CONFIG_CPUSETS
+static const struct fs_context_operations cpuset_fs_context_ops = {
+ .get_tree = cgroup1_get_tree,
+ .free = cgroup_fs_context_free,
+};
+
+/*
+ * This is ugly, but preserves the userspace API for existing cpuset
+ * users. If someone tries to mount the "cpuset" filesystem, we
+ * silently switch it to mount "cgroup" instead
+ */
+static int cpuset_init_fs_context(struct fs_context *fc)
+{
+ char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
+ struct cgroup_fs_context *ctx;
+ int err;
+
+ err = cgroup_init_fs_context(fc);
+ if (err) {
+ kfree(agent);
+ return err;
+ }
+
+ fc->ops = &cpuset_fs_context_ops;
+
+ ctx = cgroup_fc2context(fc);
+ ctx->subsys_mask = 1 << cpuset_cgrp_id;
+ ctx->flags |= CGRP_ROOT_NOPREFIX;
+ ctx->release_agent = agent;
+
+ get_filesystem(&cgroup_fs_type);
+ put_filesystem(fc->fs_type);
+ fc->fs_type = &cgroup_fs_type;
+
+ return 0;
+}
+
+static struct file_system_type cpuset_fs_type = {
+ .name = "cpuset",
+ .init_fs_context = cpuset_init_fs_context,
+ .fs_flags = FS_USERNS_MOUNT,
+};
+#endif
+
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns)
{
@@ -5761,6 +5804,9 @@ int __init cgroup_init(void)
WARN_ON(register_filesystem(&cgroup_fs_type));
WARN_ON(register_filesystem(&cgroup2_fs_type));
WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
+#ifdef CONFIG_CPUSETS
+ WARN_ON(register_filesystem(&cpuset_fs_type));
+#endif
return 0;
}
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index b3b02b9c4405..5aa37531ce76 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -356,59 +356,6 @@ static inline bool is_in_v2_mode(void)
}
/*
- * This is ugly, but preserves the userspace API for existing cpuset
- * users. If someone tries to mount the "cpuset" filesystem, we
- * silently switch it to mount "cgroup" instead
- */
-static int cpuset_get_tree(struct fs_context *fc)
-{
- struct file_system_type *cgroup_fs;
- struct fs_context *new_fc;
- int ret;
-
- cgroup_fs = get_fs_type("cgroup");
- if (!cgroup_fs)
- return -ENODEV;
-
- new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags);
- if (IS_ERR(new_fc)) {
- ret = PTR_ERR(new_fc);
- } else {
- static const char agent_path[] = "/sbin/cpuset_release_agent";
- ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0);
- if (!ret)
- ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0);
- if (!ret)
- ret = vfs_parse_fs_string(new_fc, "release_agent",
- agent_path, sizeof(agent_path) - 1);
- if (!ret)
- ret = vfs_get_tree(new_fc);
- if (!ret) { /* steal the result */
- fc->root = new_fc->root;
- new_fc->root = NULL;
- }
- put_fs_context(new_fc);
- }
- put_filesystem(cgroup_fs);
- return ret;
-}
-
-static const struct fs_context_operations cpuset_fs_context_ops = {
- .get_tree = cpuset_get_tree,
-};
-
-static int cpuset_init_fs_context(struct fs_context *fc)
-{
- fc->ops = &cpuset_fs_context_ops;
- return 0;
-}
-
-static struct file_system_type cpuset_fs_type = {
- .name = "cpuset",
- .init_fs_context = cpuset_init_fs_context,
-};
-
-/*
* Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
* until we find one that does have some online cpus.
@@ -729,7 +676,7 @@ static inline int nr_cpusets(void)
* load balancing domains (sched domains) as specified by that partial
* partition.
*
- * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.rst
+ * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
* for a background explanation of this.
*
* Does not return errors, on the theory that the callers of this
@@ -2853,13 +2800,11 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
/**
* cpuset_init - initialize cpusets at system boot
*
- * Description: Initialize top_cpuset and the cpuset internal file system,
+ * Description: Initialize top_cpuset
**/
int __init cpuset_init(void)
{
- int err = 0;
-
BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
@@ -2873,10 +2818,6 @@ int __init cpuset_init(void)
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
top_cpuset.relax_domain_level = -1;
- err = register_filesystem(&cpuset_fs_type);
- if (err < 0)
- return err;
-
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
return 0;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 62fa5a82a065..9de232229063 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -129,15 +129,17 @@ setup_io_tlb_npages(char *str)
}
early_param("swiotlb", setup_io_tlb_npages);
+static bool no_iotlb_memory;
+
unsigned long swiotlb_nr_tbl(void)
{
- return io_tlb_nslabs;
+ return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
}
EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
unsigned int swiotlb_max_segment(void)
{
- return max_segment;
+ return unlikely(no_iotlb_memory) ? 0 : max_segment;
}
EXPORT_SYMBOL_GPL(swiotlb_max_segment);
@@ -160,8 +162,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
-static bool no_iotlb_memory;
-
void swiotlb_print_info(void)
{
unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
@@ -317,6 +317,14 @@ swiotlb_late_init_with_default_size(size_t default_size)
return rc;
}
+static void swiotlb_cleanup(void)
+{
+ io_tlb_end = 0;
+ io_tlb_start = 0;
+ io_tlb_nslabs = 0;
+ max_segment = 0;
+}
+
int
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
@@ -367,10 +375,7 @@ cleanup4:
sizeof(int)));
io_tlb_list = NULL;
cleanup3:
- io_tlb_end = 0;
- io_tlb_start = 0;
- io_tlb_nslabs = 0;
- max_segment = 0;
+ swiotlb_cleanup();
return -ENOMEM;
}
@@ -394,10 +399,7 @@ void __init swiotlb_exit(void)
memblock_free_late(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
- io_tlb_start = 0;
- io_tlb_end = 0;
- io_tlb_nslabs = 0;
- max_segment = 0;
+ swiotlb_cleanup();
}
/*
@@ -546,7 +548,7 @@ not_found:
if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
size, io_tlb_nslabs, tmp_io_tlb_used);
- return DMA_MAPPING_ERROR;
+ return (phys_addr_t)DMA_MAPPING_ERROR;
found:
io_tlb_used += nslots;
spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -664,7 +666,7 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
/* Oh well, have to allocate and map a bounce buffer. */
*phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
*phys, size, dir, attrs);
- if (*phys == DMA_MAPPING_ERROR)
+ if (*phys == (phys_addr_t)DMA_MAPPING_ERROR)
return false;
/* Ensure that the address returned is DMA'ble */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 785d708f8553..026a14541a38 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2553,6 +2553,9 @@ unlock:
return ret;
}
+static bool exclusive_event_installable(struct perf_event *event,
+ struct perf_event_context *ctx);
+
/*
* Attach a performance event to a context.
*
@@ -2567,6 +2570,8 @@ perf_install_in_context(struct perf_event_context *ctx,
lockdep_assert_held(&ctx->mutex);
+ WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
+
if (event->cpu != -1)
event->cpu = cpu;
@@ -4360,7 +4365,7 @@ static int exclusive_event_init(struct perf_event *event)
{
struct pmu *pmu = event->pmu;
- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+ if (!is_exclusive_pmu(pmu))
return 0;
/*
@@ -4391,7 +4396,7 @@ static void exclusive_event_destroy(struct perf_event *event)
{
struct pmu *pmu = event->pmu;
- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+ if (!is_exclusive_pmu(pmu))
return;
/* see comment in exclusive_event_init() */
@@ -4411,14 +4416,15 @@ static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
return false;
}
-/* Called under the same ctx::mutex as perf_install_in_context() */
static bool exclusive_event_installable(struct perf_event *event,
struct perf_event_context *ctx)
{
struct perf_event *iter_event;
struct pmu *pmu = event->pmu;
- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE))
+ lockdep_assert_held(&ctx->mutex);
+
+ if (!is_exclusive_pmu(pmu))
return true;
list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
@@ -4465,12 +4471,20 @@ static void _free_event(struct perf_event *event)
if (event->destroy)
event->destroy(event);
- if (event->ctx)
- put_ctx(event->ctx);
-
+ /*
+ * Must be after ->destroy(), due to uprobe_perf_close() using
+ * hw.target.
+ */
if (event->hw.target)
put_task_struct(event->hw.target);
+ /*
+ * perf_event_free_task() relies on put_ctx() being 'last', in particular
+ * all task references must be cleaned up.
+ */
+ if (event->ctx)
+ put_ctx(event->ctx);
+
exclusive_event_destroy(event);
module_put(event->pmu->module);
@@ -4650,8 +4664,17 @@ again:
mutex_unlock(&event->child_mutex);
list_for_each_entry_safe(child, tmp, &free_list, child_list) {
+ void *var = &child->ctx->refcount;
+
list_del(&child->child_list);
free_event(child);
+
+ /*
+ * Wake any perf_event_free_task() waiting for this event to be
+ * freed.
+ */
+ smp_mb(); /* pairs with wait_var_event() */
+ wake_up_var(var);
}
no_ctx:
@@ -10930,11 +10953,6 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_alloc;
}
- if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) {
- err = -EBUSY;
- goto err_context;
- }
-
/*
* Look up the group leader (we will attach this event to it):
*/
@@ -11022,6 +11040,18 @@ SYSCALL_DEFINE5(perf_event_open,
move_group = 0;
}
}
+
+ /*
+ * Failure to create exclusive events returns -EBUSY.
+ */
+ err = -EBUSY;
+ if (!exclusive_event_installable(group_leader, ctx))
+ goto err_locked;
+
+ for_each_sibling_event(sibling, group_leader) {
+ if (!exclusive_event_installable(sibling, ctx))
+ goto err_locked;
+ }
} else {
mutex_lock(&ctx->mutex);
}
@@ -11058,9 +11088,6 @@ SYSCALL_DEFINE5(perf_event_open,
* because we need to serialize with concurrent event creation.
*/
if (!exclusive_event_installable(event, ctx)) {
- /* exclusive and group stuff are assumed mutually exclusive */
- WARN_ON_ONCE(move_group);
-
err = -EBUSY;
goto err_locked;
}
@@ -11527,11 +11554,11 @@ static void perf_free_event(struct perf_event *event,
}
/*
- * Free an unexposed, unused context as created by inheritance by
- * perf_event_init_task below, used by fork() in case of fail.
+ * Free a context as created by inheritance by perf_event_init_task() below,
+ * used by fork() in case of fail.
*
- * Not all locks are strictly required, but take them anyway to be nice and
- * help out with the lockdep assertions.
+ * Even though the task has never lived, the context and events have been
+ * exposed through the child_list, so we must take care tearing it all down.
*/
void perf_event_free_task(struct task_struct *task)
{
@@ -11561,7 +11588,23 @@ void perf_event_free_task(struct task_struct *task)
perf_free_event(event, ctx);
mutex_unlock(&ctx->mutex);
- put_ctx(ctx);
+
+ /*
+ * perf_event_release_kernel() could've stolen some of our
+ * child events and still have them on its free_list. In that
+ * case we must wait for these events to have been freed (in
+ * particular all their references to this task must've been
+ * dropped).
+ *
+ * Without this copy_process() will unconditionally free this
+ * task (irrespective of its reference count) and
+ * _free_event()'s put_task_struct(event->hw.target) will be a
+ * use-after-free.
+ *
+ * Wait for all events to drop their context reference.
+ */
+ wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
+ put_ctx(ctx); /* must be last */
}
}
@@ -11575,9 +11618,7 @@ void perf_event_delayed_put(struct task_struct *task)
struct file *perf_event_get(unsigned int fd)
{
- struct file *file;
-
- file = fget_raw(fd);
+ struct file *file = fget(fd);
if (!file)
return ERR_PTR(-EBADF);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8f3e2d97d771..d8ae0f1b4148 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -677,7 +677,6 @@ void __mmdrop(struct mm_struct *mm)
WARN_ON_ONCE(mm == current->active_mm);
mm_free_pgd(mm);
destroy_context(mm);
- hmm_mm_destroy(mm);
mmu_notifier_mm_destroy(mm);
check_mm(mm);
put_user_ns(mm->user_ns);
@@ -2406,6 +2405,16 @@ long _do_fork(struct kernel_clone_args *args)
return nr;
}
+bool legacy_clone_args_valid(const struct kernel_clone_args *kargs)
+{
+ /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
+ if ((kargs->flags & CLONE_PIDFD) &&
+ (kargs->flags & CLONE_PARENT_SETTID))
+ return false;
+
+ return true;
+}
+
#ifndef CONFIG_HAVE_COPY_THREAD_TLS
/* For compatibility with architectures that call do_fork directly rather than
* using the syscall entry points below. */
@@ -2417,6 +2426,7 @@ long do_fork(unsigned long clone_flags,
{
struct kernel_clone_args args = {
.flags = (clone_flags & ~CSIGNAL),
+ .pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
.exit_signal = (clone_flags & CSIGNAL),
@@ -2424,6 +2434,9 @@ long do_fork(unsigned long clone_flags,
.stack_size = stack_size,
};
+ if (!legacy_clone_args_valid(&args))
+ return -EINVAL;
+
return _do_fork(&args);
}
#endif
@@ -2505,8 +2518,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
.tls = tls,
};
- /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
- if ((clone_flags & CLONE_PIDFD) && (clone_flags & CLONE_PARENT_SETTID))
+ if (!legacy_clone_args_valid(&args))
return -EINVAL;
return _do_fork(&args);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9f5433a52488..9873fc627d61 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2276,6 +2276,7 @@ static int __init init_kprobes(void)
init_test_probes();
return err;
}
+subsys_initcall(init_kprobes);
#ifdef CONFIG_DEBUG_FS
static void report_probe(struct seq_file *pi, struct kprobe *p,
@@ -2588,5 +2589,3 @@ static int __init debugfs_kprobe_init(void)
late_initcall(debugfs_kprobe_init);
#endif /* CONFIG_DEBUG_FS */
-
-module_init(init_kprobes);
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 9c49ec645d8b..65b6a1600c8f 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -210,6 +210,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
sum_forward_deps = 0;
+#ifdef CONFIG_PROVE_LOCKING
list_for_each_entry(class, &all_lock_classes, lock_entry) {
if (class->usage_mask == 0)
@@ -241,13 +242,13 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
nr_hardirq_read_unsafe++;
-#ifdef CONFIG_PROVE_LOCKING
sum_forward_deps += lockdep_count_forward_deps(class);
-#endif
}
#ifdef CONFIG_DEBUG_LOCKDEP
DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
#endif
+
+#endif
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
nr_lock_classes, MAX_LOCKDEP_KEYS);
seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 0c601ae072b3..edd1c082dbf5 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -16,7 +16,7 @@
* by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
* and Sven Dietrich.
*
- * Also see Documentation/locking/mutex-design.txt.
+ * Also see Documentation/locking/mutex-design.rst.
*/
#include <linux/mutex.h>
#include <linux/ww_mutex.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 38fbf9fa7f1b..fa83d36e30c6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -9,7 +9,7 @@
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
*
- * See Documentation/locking/rt-mutex-design.txt for details.
+ * See Documentation/locking/rt-mutex-design.rst for details.
*/
#include <linux/spinlock.h>
#include <linux/export.h>
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 6e1970719dc2..6ee03a816d67 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -11,41 +11,39 @@
#include <linux/types.h>
#include <linux/wait_bit.h>
#include <linux/xarray.h>
-#include <linux/hmm.h>
static DEFINE_XARRAY(pgmap_array);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
-vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
- unsigned long addr,
- swp_entry_t entry,
- unsigned int flags,
- pmd_t *pmdp)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL(devmap_managed_key);
+static atomic_t devmap_managed_enable;
+
+static void devmap_managed_enable_put(void *data)
{
- struct page *page = device_private_entry_to_page(entry);
- struct hmm_devmem *devmem;
+ if (atomic_dec_and_test(&devmap_managed_enable))
+ static_branch_disable(&devmap_managed_key);
+}
- devmem = container_of(page->pgmap, typeof(*devmem), pagemap);
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+ if (!pgmap->ops || !pgmap->ops->page_free) {
+ WARN(1, "Missing page_free method\n");
+ return -EINVAL;
+ }
- /*
- * The page_fault() callback must migrate page back to system memory
- * so that CPU can access it. This might fail for various reasons
- * (device issue, device was unsafely unplugged, ...). When such
- * error conditions happen, the callback must return VM_FAULT_SIGBUS.
- *
- * Note that because memory cgroup charges are accounted to the device
- * memory, this should never fail because of memory restrictions (but
- * allocation of regular system page might still fail because we are
- * out of memory).
- *
- * There is a more in-depth description of what that callback can and
- * cannot do, in include/linux/memremap.h
- */
- return devmem->page_fault(vma, addr, page, flags, pmdp);
+ if (atomic_inc_return(&devmap_managed_enable) == 1)
+ static_branch_enable(&devmap_managed_key);
+ return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL);
}
-#endif /* CONFIG_DEVICE_PRIVATE */
+#else
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
static void pgmap_array_delete(struct resource *res)
{
@@ -56,14 +54,8 @@ static void pgmap_array_delete(struct resource *res)
static unsigned long pf