summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst34
-rw-r--r--Documentation/scheduler/sched-bwc.rst74
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/Kconfig2
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/entry/entry_64.S4
-rw-r--r--arch/x86/entry/thunk_32.S2
-rw-r--r--arch/x86/entry/thunk_64.S4
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--include/asm-generic/preempt.h4
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/cpuset.h13
-rw-r--r--include/linux/preempt.h6
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/sched/deadline.h8
-rw-r--r--include/linux/sched/task.h6
-rw-r--r--include/linux/sched/topology.h10
-rw-r--r--include/linux/spinlock.h2
-rw-r--r--include/linux/spinlock_api_smp.h2
-rw-r--r--include/linux/topology.h14
-rw-r--r--include/linux/torture.h2
-rw-r--r--init/Kconfig22
-rw-r--r--init/init_task.c2
-rw-r--r--init/main.c2
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/cgroup/cpuset.c163
-rw-r--r--kernel/events/core.c9
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/locking/rtmutex.c6
-rw-r--r--kernel/rcu/Kconfig8
-rw-r--r--kernel/rcu/tree.c12
-rw-r--r--kernel/rcu/tree_stall.h6
-rw-r--r--kernel/sched/core.c561
-rw-r--r--kernel/sched/cpufreq_schedutil.c6
-rw-r--r--kernel/sched/deadline.c134
-rw-r--r--kernel/sched/fair.c409
-rw-r--r--kernel/sched/idle.c31
-rw-r--r--kernel/sched/isolation.c12
-rw-r--r--kernel/sched/psi.c2
-rw-r--r--kernel/sched/rt.c74
-rw-r--r--kernel/sched/sched.h63
-rw-r--r--kernel/sched/stats.h7
-rw-r--r--kernel/sched/stop_task.c22
-rw-r--r--kernel/sched/topology.c53
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c3
-rw-r--r--mm/khugepaged.c2
-rw-r--r--mm/page_alloc.c2
60 files changed, 1274 insertions, 595 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 3b29005aa981..5f1c266131b0 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -951,6 +951,13 @@ controller implements weight and absolute bandwidth limit models for
normal scheduling policy and absolute bandwidth allocation model for
realtime scheduling policy.
+In all the above models, cycles distribution is defined only on a temporal
+base and it does not account for the frequency at which tasks are executed.
+The (optional) utilization clamping support allows to hint the schedutil
+cpufreq governor about the minimum desired frequency which should always be
+provided by a CPU, as well as the maximum desired frequency, which should not
+be exceeded by a CPU.
+
WARNING: cgroup2 doesn't yet support control of realtime processes and
the cpu controller can only be enabled when all RT processes are in
the root cgroup. Be aware that system management software may already
@@ -1016,6 +1023,33 @@ All time durations are in microseconds.
Shows pressure stall information for CPU. See
Documentation/accounting/psi.rst for details.
+ cpu.uclamp.min
+ A read-write single value file which exists on non-root cgroups.
+ The default is "0", i.e. no utilization boosting.
+
+ The requested minimum utilization (protection) as a percentage
+ rational number, e.g. 12.34 for 12.34%.
+
+ This interface allows reading and setting minimum utilization clamp
+ values similar to the sched_setattr(2). This minimum utilization
+ value is used to clamp the task specific minimum utilization clamp.
+
+ The requested minimum utilization (protection) is always capped by
+ the current value for the maximum utilization (limit), i.e.
+ `cpu.uclamp.max`.
+
+ cpu.uclamp.max
+ A read-write single value file which exists on non-root cgroups.
+ The default is "max". i.e. no utilization capping
+
+ The requested maximum utilization (limit) as a percentage rational
+ number, e.g. 98.76 for 98.76%.
+
+ This interface allows reading and setting maximum utilization clamp
+ values similar to the sched_setattr(2). This maximum utilization
+ value is used to clamp the task specific maximum utilization clamp.
+
+
Memory
------
diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst
index 3a9064219656..9801d6b284b1 100644
--- a/Documentation/scheduler/sched-bwc.rst
+++ b/Documentation/scheduler/sched-bwc.rst
@@ -9,15 +9,16 @@ CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
specification of the maximum CPU bandwidth available to a group or hierarchy.
The bandwidth allowed for a group is specified using a quota and period. Within
-each given "period" (microseconds), a group is allowed to consume only up to
-"quota" microseconds of CPU time. When the CPU bandwidth consumption of a
-group exceeds this limit (for that period), the tasks belonging to its
-hierarchy will be throttled and are not allowed to run again until the next
-period.
-
-A group's unused runtime is globally tracked, being refreshed with quota units
-above at each period boundary. As threads consume this bandwidth it is
-transferred to cpu-local "silos" on a demand basis. The amount transferred
+each given "period" (microseconds), a task group is allocated up to "quota"
+microseconds of CPU time. That quota is assigned to per-cpu run queues in
+slices as threads in the cgroup become runnable. Once all quota has been
+assigned any additional requests for quota will result in those threads being
+throttled. Throttled threads will not be able to run again until the next
+period when the quota is replenished.
+
+A group's unassigned quota is globally tracked, being refreshed back to
+cfs_quota units at each period boundary. As threads consume this bandwidth it
+is transferred to cpu-local "silos" on a demand basis. The amount transferred
within each of these updates is tunable and described as the "slice".
Management
@@ -35,12 +36,12 @@ The default values are::
A value of -1 for cpu.cfs_quota_us indicates that the group does not have any
bandwidth restriction in place, such a group is described as an unconstrained
-bandwidth group. This represents the traditional work-conserving behavior for
+bandwidth group. This represents the traditional work-conserving behavior for
CFS.
Writing any (valid) positive value(s) will enact the specified bandwidth limit.
-The minimum quota allowed for the quota or period is 1ms. There is also an
-upper bound on the period length of 1s. Additional restrictions exist when
+The minimum quota allowed for the quota or period is 1ms. There is also an
+upper bound on the period length of 1s. Additional restrictions exist when
bandwidth limits are used in a hierarchical fashion, these are explained in
more detail below.
@@ -53,8 +54,8 @@ unthrottled if it is in a constrained state.
System wide settings
--------------------
For efficiency run-time is transferred between the global pool and CPU local
-"silos" in a batch fashion. This greatly reduces global accounting pressure
-on large systems. The amount transferred each time such an update is required
+"silos" in a batch fashion. This greatly reduces global accounting pressure
+on large systems. The amount transferred each time such an update is required
is described as the "slice".
This is tunable via procfs::
@@ -97,6 +98,51 @@ There are two ways in which a group may become throttled:
In case b) above, even though the child may have runtime remaining it will not
be allowed to until the parent's runtime is refreshed.
+CFS Bandwidth Quota Caveats
+---------------------------
+Once a slice is assigned to a cpu it does not expire. However all but 1ms of
+the slice may be returned to the global pool if all threads on that cpu become
+unrunnable. This is configured at compile time by the min_cfs_rq_runtime
+variable. This is a performance tweak that helps prevent added contention on
+the global lock.
+
+The fact that cpu-local slices do not expire results in some interesting corner
+cases that should be understood.
+
+For cgroup cpu constrained applications that are cpu limited this is a
+relatively moot point because they will naturally consume the entirety of their
+quota as well as the entirety of each cpu-local slice in each period. As a
+result it is expected that nr_periods roughly equal nr_throttled, and that
+cpuacct.usage will increase roughly equal to cfs_quota_us in each period.
+
+For highly-threaded, non-cpu bound applications this non-expiration nuance
+allows applications to briefly burst past their quota limits by the amount of
+unused slice on each cpu that the task group is running on (typically at most
+1ms per cpu or as defined by min_cfs_rq_runtime). This slight burst only
+applies if quota had been assigned to a cpu and then not fully used or returned
+in previous periods. This burst amount will not be transferred between cores.
+As a result, this mechanism still strictly limits the task group to quota
+average usage, albeit over a longer time window than a single period. This
+also limits the burst ability to no more than 1ms per cpu. This provides
+better more predictable user experience for highly threaded applications with
+small quota limits on high core count machines. It also eliminates the
+propensity to throttle these applications while simultanously using less than
+quota amounts of cpu. Another way to say this, is that by allowing the unused
+portion of a slice to remain valid across periods we have decreased the
+possibility of wastefully expiring quota on cpu-local silos that don't need a
+full slice's amount of cpu time.
+
+The interaction between cpu-bound and non-cpu-bound-interactive applications
+should also be considered, especially when single core usage hits 100%. If you
+gave each of these applications half of a cpu-core and they both got scheduled
+on the same CPU it is theoretically possible that the non-cpu bound application
+will use up to 1ms additional quota in some periods, thereby preventing the
+cpu-bound application from fully using its quota by that same amount. In these
+instances it will be up to the CFS algorithm (see sched-design-CFS.rst) to
+decide which application is chosen to run, as they will both be runnable and
+have remaining quota. This runtime discrepancy will be made up in the following
+periods when the interactive application idles.
+
Examples
--------
1. Limit a group to 1 CPU worth of runtime::
diff --git a/MAINTAINERS b/MAINTAINERS
index cbe625343277..49f75d1b7b51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12578,6 +12578,7 @@ PERFORMANCE EVENTS SUBSYSTEM
M: Peter Zijlstra <peterz@infradead.org>
M: Ingo Molnar <mingo@redhat.com>
M: Arnaldo Carvalho de Melo <acme@kernel.org>
+R: Mark Rutland <mark.rutland@arm.com>
R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
R: Jiri Olsa <jolsa@redhat.com>
R: Namhyung Kim <namhyung@kernel.org>
@@ -14175,6 +14176,12 @@ F: drivers/watchdog/sc1200wdt.c
SCHEDULER
M: Ingo Molnar <mingo@redhat.com>
M: Peter Zijlstra <peterz@infradead.org>
+M: Juri Lelli <juri.lelli@redhat.com> (SCHED_DEADLINE)
+M: Vincent Guittot <vincent.guittot@linaro.org> (SCHED_NORMAL)
+R: Dietmar Eggemann <dietmar.eggemann@arm.com> (SCHED_NORMAL)
+R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
+R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
+R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
S: Maintained
diff --git a/arch/Kconfig b/arch/Kconfig
index 71d9ae0c0ea1..6baedab10dca 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -106,7 +106,7 @@ config STATIC_KEYS_SELFTEST
config OPTPROBES
def_bool y
depends on KPROBES && HAVE_OPTPROBES
- select TASKS_RCU if PREEMPT
+ select TASKS_RCU if PREEMPTION
config KPROBES_ON_FTRACE
def_bool y
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 13d49c232556..9711cf730929 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -311,6 +311,7 @@ config ARCH_DISCONTIGMEM_DEFAULT
config NUMA
bool "NUMA support"
depends on !FLATMEM
+ select SMP
help
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
Access). This option is for configuring high-end multiprocessor
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4f86928246e7..f83ca5aa8b77 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -63,7 +63,7 @@
* enough to patch inline, increasing performance.
*/
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
@@ -1084,7 +1084,7 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz .Lno_preempt
@@ -1364,7 +1364,7 @@ ENTRY(xen_hypervisor_callback)
ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
#endif
jmp ret_from_intr
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index be9ca198c581..af077ded1969 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -664,7 +664,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
/* Returning to kernel space */
retint_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* Interrupts are off */
/* Check if we need preemption */
btl $9, EFLAGS(%rsp) /* were interrupts off? */
@@ -1115,7 +1115,7 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
call xen_evtchn_do_upcall
LEAVE_IRQ_STACK
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
#endif
jmp error_exit
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index cb3464525b37..2713490611a3 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -34,7 +34,7 @@
THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
#endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cc20465b2867..ea5c4167086c 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -46,7 +46,7 @@
THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
#endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
@@ -55,7 +55,7 @@
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
- || defined(CONFIG_PREEMPT)
+ || defined(CONFIG_PREEMPTION)
.L_restore:
popq %r11
popq %r10
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..3d4cb83a8828 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -102,7 +102,7 @@ static __always_inline bool should_resched(int preempt_offset)
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
extern asmlinkage void ___preempt_schedule(void);
# define __preempt_schedule() \
asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 68c363c341bf..7d6e0efcc2db 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/random.h>
+#include <linux/topology.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
@@ -889,6 +890,10 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
+#ifdef CONFIG_NUMA
+ node_reclaim_distance = 32;
+#endif
+
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID.
* Always set it, except when running under a hypervisor.
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2b5886401e5f..e07424e19274 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -367,13 +367,18 @@ NOKPROBE_SYMBOL(oops_end);
int __die(const char *str, struct pt_regs *regs, long err)
{
+ const char *pr = "";
+
/* Save the regs of the first oops for the executive summary later. */
if (!die_counter)
exec_summary_regs = *regs;
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk(KERN_DEFAULT
"%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ pr,
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0e0b08008b5a..43fc13c831af 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -580,7 +580,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
if (setup_detour_execution(p, regs, reenter))
return;
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
if (p->ainsn.boostable && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
if (!reenter)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4ab377c9fffe..4cc967178bf9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -311,7 +311,7 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif
pa |= KVM_ASYNC_PF_ENABLED;
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index c3046c920063..d683f5e6d791 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -78,11 +78,11 @@ static __always_inline bool should_resched(int preempt_offset)
tif_need_resched());
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
extern asmlinkage void preempt_schedule(void);
#define __preempt_schedule() preempt_schedule()
extern asmlinkage void preempt_schedule_notrace(void);
#define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f6b048902d6c..3ba3e6da13a6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -150,6 +150,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp);
+void cgroup_enable_task_cg_lists(void);
void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
struct css_task_iter *it);
struct task_struct *css_task_iter_next(struct css_task_iter *it);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 934633a05d20..04c20de66afc 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void)
static inline void cpuset_inc(void)
{
- static_branch_inc(&cpusets_pre_enable_key);
- static_branch_inc(&cpusets_enabled_key);
+ static_branch_inc_cpuslocked(&cpusets_pre_enable_key);
+ static_branch_inc_cpuslocked(&cpusets_enabled_key);
}
static inline void cpuset_dec(void)
{
- static_branch_dec(&cpusets_enabled_key);
- static_branch_dec(&cpusets_pre_enable_key);
+ static_branch_dec_cpuslocked(&cpusets_enabled_key);
+ static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
}
extern int cpuset_init(void);
@@ -55,6 +55,8 @@ extern void cpuset_init_smp(void);
extern void cpuset_force_rebuild(void);
extern void cpuset_update_active_cpus(void);
extern void cpuset_wait_for_hotplug(void);
+extern void cpuset_read_lock(void);
+extern void cpuset_read_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -176,6 +178,9 @@ static inline void cpuset_update_active_cpus(void)
static inline void cpuset_wait_for_hotplug(void) { }
+static inline void cpuset_read_lock(void) { }
+static inline void cpuset_read_unlock(void) { }
+
static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask)
{
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index dd92b1a93919..bbb68dba37cc 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -182,7 +182,7 @@ do { \
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
#define preempt_enable() \
do { \
barrier(); \
@@ -203,7 +203,7 @@ do { \
__preempt_schedule(); \
} while (0)
-#else /* !CONFIG_PREEMPT */
+#else /* !CONFIG_PREEMPTION */
#define preempt_enable() \
do { \
barrier(); \
@@ -217,7 +217,7 @@ do { \
} while (0)
#define preempt_check_resched() do { } while (0)
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
#define preempt_disable_notrace() \
do { \
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 80d6056f5855..75a2eded7aa2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -585,7 +585,7 @@ do { \
*
* In non-preemptible RCU implementations (TREE_RCU and TINY_RCU),
* it is illegal to block while in an RCU read-side critical section.
- * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPT
+ * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION
* kernel builds, RCU read-side critical sections may be preempted,
* but explicit blocking is illegal. Finally, in preemptible RCU
* implementations in real-time (with -rt patchset) kernel builds, RCU
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 735601ac27d3..18b1ed9864b0 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -53,7 +53,7 @@ void rcu_scheduler_starting(void);
extern int rcu_scheduler_active __read_mostly;
void rcu_end_inkernel_boot(void);
bool rcu_is_watching(void);
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
void rcu_all_qs(void);
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..f0edee94834a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -295,6 +295,11 @@ enum uclamp_id {
UCLAMP_CNT
};
+#ifdef CONFIG_SMP
+extern struct root_domain def_root_domain;
+extern struct mutex sched_domains_mutex;
+#endif
+
struct sched_info {
#ifdef CONFIG_SCHED_INFO
/* Cumulative counters: */
@@ -1767,7 +1772,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
* value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling,
*/
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
extern int _cond_resched(void);
#else
static inline int _cond_resched(void) { return 0; }
@@ -1796,12 +1801,12 @@ static inline void cond_resched_rcu(void)
/*
* Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * task waiting?: (te