summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 11:34:53 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 11:34:53 -0700
commit330e9e46253cbfab178450c976aa90ef0f3ae940 (patch)
tree4a9875d264a477449a9c9a0ff23937550d1c3234 /kernel
parente94693f797400b9f424c69e1da9381e4846762fe (diff)
parent567b64aaefc4ef9ae3af124ae0b13dc13a6804a8 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar: "The sole purpose of these changes is to shrink and simplify the RCU code base, which has suffered from creeping bloat over the past couple of years. The end result is a net removal of ~2700 lines of code: 79 files changed, 1496 insertions(+), 4211 deletions(-) Plus there's a marked reduction in the Kconfig space complexity as well, here's the number of matches on 'grep RCU' in the .config: before after x86-defconfig 17 15 x86-allmodconfig 33 20" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (86 commits) rcu: Remove RCU CPU stall warnings from Tiny RCU rcu: Remove event tracing from Tiny RCU rcu: Move RCU debug Kconfig options to kernel/rcu rcu: Move RCU non-debug Kconfig options to kernel/rcu rcu: Eliminate NOCBs CPU-state Kconfig options rcu: Remove debugfs tracing srcu: Remove Classic SRCU srcu: Fix rcutorture-statistics typo rcu: Remove SPARSE_RCU_POINTER Kconfig option rcu: Remove the now-obsolete PROVE_RCU_REPEATEDLY Kconfig option rcu: Remove typecheck() from RCU locking wrapper functions rcu: Remove #ifdef moving rcu_end_inkernel_boot from rcupdate.h rcu: Remove nohz_full full-system-idle state machine rcu: Remove the RCU_KTHREAD_PRIO Kconfig option rcu: Remove *_SLOW_* Kconfig options srcu: Use rnp->lock wrappers to replace explicit memory barriers rcu: Move rnp->lock wrappers for SRCU use rcu: Convert rnp->lock wrappers to macros for SRCU use rcu: Refactor #includes from include/linux/rcupdate.h bcm47xx: Fix build regression ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/locking/lockdep.c176
-rw-r--r--kernel/rcu/Kconfig242
-rw-r--r--kernel/rcu/Kconfig.debug82
-rw-r--r--kernel/rcu/Makefile2
-rw-r--r--kernel/rcu/rcu.h277
-rw-r--r--kernel/rcu/rcuperf.c129
-rw-r--r--kernel/rcu/rcutorture.c21
-rw-r--r--kernel/rcu/srcu.c661
-rw-r--r--kernel/rcu/srcutiny.c86
-rw-r--r--kernel/rcu/srcutree.c187
-rw-r--r--kernel/rcu/tiny.c54
-rw-r--r--kernel/rcu/tiny_plugin.h123
-rw-r--r--kernel/rcu/tree.c195
-rw-r--r--kernel/rcu/tree.h109
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_plugin.h573
-rw-r--r--kernel/rcu/tree_trace.c494
-rw-r--r--kernel/rcu/update.c77
-rw-r--r--kernel/sched/core.c8
-rw-r--r--kernel/time/Kconfig50
20 files changed, 1248 insertions, 2300 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index c0e31bfee25c..7d2499bec5fe 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1157,18 +1157,18 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
if (debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("======================================================\n");
pr_warn("WARNING: possible circular locking dependency detected\n");
print_kernel_ident();
pr_warn("------------------------------------------------------\n");
- printk("%s/%d is trying to acquire lock:\n",
+ pr_warn("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(check_src);
- printk("\nbut task is already holding lock:\n");
+ pr_warn("\nbut task is already holding lock:\n");
print_lock(check_tgt);
- printk("\nwhich lock already depends on the new lock.\n\n");
- printk("\nthe existing dependency chain (in reverse order) is:\n");
+ pr_warn("\nwhich lock already depends on the new lock.\n\n");
+ pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
print_circular_bug_entry(entry, depth);
@@ -1495,13 +1495,13 @@ print_bad_irq_dependency(struct task_struct *curr,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("=====================================================\n");
pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
irqclass, irqclass);
print_kernel_ident();
pr_warn("-----------------------------------------------------\n");
- printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+ pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
@@ -1509,46 +1509,46 @@ print_bad_irq_dependency(struct task_struct *curr,
curr->softirqs_enabled);
print_lock(next);
- printk("\nand this task is already holding:\n");
+ pr_warn("\nand this task is already holding:\n");
print_lock(prev);
- printk("which would create a new lock dependency:\n");
+ pr_warn("which would create a new lock dependency:\n");
print_lock_name(hlock_class(prev));
- printk(KERN_CONT " ->");
+ pr_cont(" ->");
print_lock_name(hlock_class(next));
- printk(KERN_CONT "\n");
+ pr_cont("\n");
- printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
+ pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n",
irqclass);
print_lock_name(backwards_entry->class);
- printk("\n... which became %s-irq-safe at:\n", irqclass);
+ pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
print_stack_trace(backwards_entry->class->usage_traces + bit1, 1);
- printk("\nto a %s-irq-unsafe lock:\n", irqclass);
+ pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
print_lock_name(forwards_entry->class);
- printk("\n... which became %s-irq-unsafe at:\n", irqclass);
- printk("...");
+ pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
+ pr_warn("...");
print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
- printk("\nother info that might help us debug this:\n\n");
+ pr_warn("\nother info that might help us debug this:\n\n");
print_irq_lock_scenario(backwards_entry, forwards_entry,
hlock_class(prev), hlock_class(next));
lockdep_print_held_locks(curr);
- printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
+ pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
if (!save_trace(&prev_root->trace))
return 0;
print_shortest_lock_dependencies(backwards_entry, prev_root);
- printk("\nthe dependencies between the lock to be acquired");
- printk(" and %s-irq-unsafe lock:\n", irqclass);
+ pr_warn("\nthe dependencies between the lock to be acquired");
+ pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
if (!save_trace(&next_root->trace))
return 0;
print_shortest_lock_dependencies(forwards_entry, next_root);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -1724,22 +1724,22 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("============================================\n");
pr_warn("WARNING: possible recursive locking detected\n");
print_kernel_ident();
pr_warn("--------------------------------------------\n");
- printk("%s/%d is trying to acquire lock:\n",
+ pr_warn("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(next);
- printk("\nbut task is already holding lock:\n");
+ pr_warn("\nbut task is already holding lock:\n");
print_lock(prev);
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
print_deadlock_scenario(next, prev);
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -2074,21 +2074,21 @@ static void print_collision(struct task_struct *curr,
struct held_lock *hlock_next,
struct lock_chain *chain)
{
- printk("\n");
+ pr_warn("\n");
pr_warn("============================\n");
pr_warn("WARNING: chain_key collision\n");
print_kernel_ident();
pr_warn("----------------------------\n");
- printk("%s/%d: ", current->comm, task_pid_nr(current));
- printk("Hash chain already cached but the contents don't match!\n");
+ pr_warn("%s/%d: ", current->comm, task_pid_nr(current));
+ pr_warn("Hash chain already cached but the contents don't match!\n");
- printk("Held locks:");
+ pr_warn("Held locks:");
print_chain_keys_held_locks(curr, hlock_next);
- printk("Locks in cached chain:");
+ pr_warn("Locks in cached chain:");
print_chain_keys_chain(chain);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
#endif
@@ -2373,16 +2373,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("================================\n");
pr_warn("WARNING: inconsistent lock state\n");
print_kernel_ident();
pr_warn("--------------------------------\n");
- printk("inconsistent {%s} -> {%s} usage.\n",
+ pr_warn("inconsistent {%s} -> {%s} usage.\n",
usage_str[prev_bit], usage_str[new_bit]);
- printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
+ pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
curr->comm, task_pid_nr(curr),
trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
@@ -2390,16 +2390,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
trace_softirqs_enabled(curr));
print_lock(this);
- printk("{%s} state was registered at:\n", usage_str[prev_bit]);
+ pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]);
print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
print_irqtrace_events(curr);
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
print_usage_bug_scenario(this);
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -2438,28 +2438,28 @@ print_irq_inversion_bug(struct task_struct *curr,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("========================================================\n");
pr_warn("WARNING: possible irq lock inversion dependency detected\n");
print_kernel_ident();
pr_warn("--------------------------------------------------------\n");
- printk("%s/%d just changed the state of lock:\n",
+ pr_warn("%s/%d just changed the state of lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(this);
if (forwards)
- printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
+ pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
else
- printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
+ pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
print_lock_name(other->class);
- printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
+ pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n");
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
/* Find a middle lock (if one exists) */
depth = get_lock_depth(other);
do {
if (depth == 0 && (entry != root)) {
- printk("lockdep:%s bad path found in chain graph\n", __func__);
+ pr_warn("lockdep:%s bad path found in chain graph\n", __func__);
break;
}
middle = entry;
@@ -2475,12 +2475,12 @@ print_irq_inversion_bug(struct task_struct *curr,
lockdep_print_held_locks(curr);
- printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
+ pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
if (!save_trace(&root->trace))
return 0;
print_shortest_lock_dependencies(other, root);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -3189,25 +3189,25 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
if (debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("==================================\n");
pr_warn("WARNING: Nested lock was not taken\n");
print_kernel_ident();
pr_warn("----------------------------------\n");
- printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
+ pr_warn("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
print_lock(hlock);
- printk("\nbut this task is not holding:\n");
- printk("%s\n", hlock->nest_lock->name);
+ pr_warn("\nbut this task is not holding:\n");
+ pr_warn("%s\n", hlock->nest_lock->name);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -3402,21 +3402,21 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
if (debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("=====================================\n");
pr_warn("WARNING: bad unlock balance detected!\n");
print_kernel_ident();
pr_warn("-------------------------------------\n");
- printk("%s/%d is trying to release lock (",
+ pr_warn("%s/%d is trying to release lock (",
curr->comm, task_pid_nr(curr));
print_lockdep_cache(lock);
- printk(KERN_CONT ") at:\n");
+ pr_cont(") at:\n");
print_ip_sym(ip);
- printk("but there are no more locks to release!\n");
- printk("\nother info that might help us debug this:\n");
+ pr_warn("but there are no more locks to release!\n");
+ pr_warn("\nother info that might help us debug this:\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -3974,21 +3974,21 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
if (debug_locks_silent)
return 0;
- printk("\n");
+ pr_warn("\n");
pr_warn("=================================\n");
pr_warn("WARNING: bad contention detected!\n");
print_kernel_ident();
pr_warn("---------------------------------\n");
- printk("%s/%d is trying to contend lock (",
+ pr_warn("%s/%d is trying to contend lock (",
curr->comm, task_pid_nr(curr));
print_lockdep_cache(lock);
- printk(KERN_CONT ") at:\n");
+ pr_cont(") at:\n");
print_ip_sym(ip);
- printk("but there are no locks held!\n");
- printk("\nother info that might help us debug this:\n");
+ pr_warn("but there are no locks held!\n");
+ pr_warn("\nother info that might help us debug this:\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -4318,17 +4318,17 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
if (debug_locks_silent)
return;
- printk("\n");
+ pr_warn("\n");
pr_warn("=========================\n");
pr_warn("WARNING: held lock freed!\n");
print_kernel_ident();
pr_warn("-------------------------\n");
- printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
+ pr_warn("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
print_lock(hlock);
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
@@ -4376,14 +4376,14 @@ static void print_held_locks_bug(void)
if (debug_locks_silent)
return;
- printk("\n");
+ pr_warn("\n");
pr_warn("====================================\n");
pr_warn("WARNING: %s/%d still has locks held!\n",
current->comm, task_pid_nr(current));
print_kernel_ident();
pr_warn("------------------------------------\n");
lockdep_print_held_locks(current);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
@@ -4402,10 +4402,10 @@ void debug_show_all_locks(void)
int unlock = 1;
if (unlikely(!debug_locks)) {
- printk("INFO: lockdep is turned off.\n");
+ pr_warn("INFO: lockdep is turned off.\n");
return;
}
- printk("\nShowing all locks held in the system:\n");
+ pr_warn("\nShowing all locks held in the system:\n");
/*
* Here we try to get the tasklist_lock as hard as possible,
@@ -4416,18 +4416,18 @@ void debug_show_all_locks(void)
retry:
if (!read_trylock(&tasklist_lock)) {
if (count == 10)
- printk("hm, tasklist_lock locked, retrying... ");
+ pr_warn("hm, tasklist_lock locked, retrying... ");
if (count) {
count--;
- printk(" #%d", 10-count);
+ pr_cont(" #%d", 10-count);
mdelay(200);
goto retry;
}
- printk(" ignoring it.\n");
+ pr_cont(" ignoring it.\n");
unlock = 0;
} else {
if (count != 10)
- printk(KERN_CONT " locked it.\n");
+ pr_cont(" locked it.\n");
}
do_each_thread(g, p) {
@@ -4445,7 +4445,7 @@ retry:
unlock = 1;
} while_each_thread(g, p);
- printk("\n");
+ pr_warn("\n");
pr_warn("=============================================\n\n");
if (unlock)
@@ -4475,12 +4475,12 @@ asmlinkage __visible void lockdep_sys_exit(void)
if (unlikely(curr->lockdep_depth)) {
if (!debug_locks_off())
return;
- printk("\n");
+ pr_warn("\n");
pr_warn("================================================\n");
pr_warn("WARNING: lock held when returning to user space!\n");
print_kernel_ident();
pr_warn("------------------------------------------------\n");
- printk("%s/%d is leaving the kernel with locks still held!\n",
+ pr_warn("%s/%d is leaving the kernel with locks still held!\n",
curr->comm, curr->pid);
lockdep_print_held_locks(curr);
}
@@ -4490,19 +4490,15 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
{
struct task_struct *curr = current;
-#ifndef CONFIG_PROVE_RCU_REPEATEDLY
- if (!debug_locks_off())
- return;
-#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
/* Note: the following can be executed concurrently, so be careful. */
- printk("\n");
+ pr_warn("\n");
pr_warn("=============================\n");
pr_warn("WARNING: suspicious RCU usage\n");
print_kernel_ident();
pr_warn("-----------------------------\n");
- printk("%s:%d %s!\n", file, line, s);
- printk("\nother info that might help us debug this:\n\n");
- printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+ pr_warn("%s:%d %s!\n", file, line, s);
+ pr_warn("\nother info that might help us debug this:\n\n");
+ pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
!rcu_lockdep_current_cpu_online()
? "RCU used illegally from offline CPU!\n"
: !rcu_is_watching()
@@ -4529,10 +4525,10 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
* rcu_read_lock_bh() and so on from extended quiescent states.
*/
if (!rcu_is_watching())
- printk("RCU used illegally from extended quiescent state!\n");
+ pr_warn("RCU used illegally from extended quiescent state!\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
new file mode 100644
index 000000000000..be90c945063f
--- /dev/null
+++ b/kernel/rcu/Kconfig
@@ -0,0 +1,242 @@
+#
+# RCU-related configuration options
+#
+
+menu "RCU Subsystem"
+
+config TREE_RCU
+ bool
+ default y if !PREEMPT && SMP
+ help
+ This option selects the RCU implementation that is
+ designed for very large SMP system with hundreds or
+ thousands of CPUs. It also scales down nicely to
+ smaller systems.
+
+config PREEMPT_RCU
+ bool
+ default y if PREEMPT
+ help
+ This option selects the RCU implementation that is
+ designed for very large SMP systems with hundreds or
+ thousands of CPUs, but for which real-time response
+ is also required. It also scales down nicely to
+ smaller systems.
+
+ Select this option if you are unsure.
+
+config TINY_RCU
+ bool
+ default y if !PREEMPT && !SMP
+ help
+ This option selects the RCU implementation that is
+ designed for UP systems from which real-time response
+ is not required. This option greatly reduces the
+ memory footprint of RCU.
+
+config RCU_EXPERT
+ bool "Make expert-level adjustments to RCU configuration"
+ default n
+ help
+ This option needs to be enabled if you wish to make
+ expert-level adjustments to RCU configuration. By default,
+ no such adjustments can be made, which has the often-beneficial
+ side-effect of preventing "make oldconfig" from asking you all
+ sorts of detailed questions about how you would like numerous
+ obscure RCU options to be set up.
+
+ Say Y if you need to make expert-level adjustments to RCU.
+
+ Say N if you are unsure.
+
+config SRCU
+ bool
+ help
+ This option selects the sleepable version of RCU. This version
+ permits arbitrary sleeping or blocking within RCU read-side critical
+ sections.
+
+config TINY_SRCU
+ bool
+ default y if SRCU && TINY_RCU
+ help
+ This option selects the single-CPU non-preemptible version of SRCU.
+
+config TREE_SRCU
+ bool
+ default y if SRCU && !TINY_RCU
+ help
+ This option selects the full-fledged version of SRCU.
+
+config TASKS_RCU
+ bool
+ default n
+ select SRCU
+ help
+ This option enables a task-based RCU implementation that uses
+ only voluntary context switch (not preemption!), idle, and
+ user-mode execution as quiescent states.
+
+config RCU_STALL_COMMON
+ def_bool ( TREE_RCU || PREEMPT_RCU )
+ help
+ This option enables RCU CPU stall code that is common between
+ the TINY and TREE variants of RCU. The purpose is to allow
+ the tiny variants to disable RCU CPU stall warnings, while
+ making these warnings mandatory for the tree variants.
+
+config RCU_NEED_SEGCBLIST
+ def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU )
+
+config CONTEXT_TRACKING
+ bool
+
+config CONTEXT_TRACKING_FORCE
+ bool "Force context tracking"
+ depends on CONTEXT_TRACKING
+ default y if !NO_HZ_FULL
+ help
+ The major pre-requirement for full dynticks to work is to
+ support the context tracking subsystem. But there are also
+ other dependencies to provide in order to make the full
+ dynticks working.
+
+ This option stands for testing when an arch implements the
+ context tracking backend but doesn't yet fullfill all the
+ requirements to make the full dynticks feature working.
+ Without the full dynticks, there is no way to test the support
+ for context tracking and the subsystems that rely on it: RCU
+ userspace extended quiescent state and tickless cputime
+ accounting. This option copes with the absence of the full
+ dynticks subsystem by forcing the context tracking on all
+ CPUs in the system.
+
+ Say Y only if you're working on the development of an
+ architecture backend for the context tracking.
+
+ Say N otherwise, this option brings an overhead that you
+ don't want in production.
+
+
+config RCU_FANOUT
+ int "Tree-based hierarchical RCU fanout value"
+ range 2 64 if 64BIT
+ range 2 32 if !64BIT
+ depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
+ default 64 if 64BIT
+ default 32 if !64BIT
+ help
+ This option controls the fanout of hierarchical implementations
+ of RCU, allowing RCU to work efficiently on machines with
+ large numbers of CPUs. This value must be at least the fourth
+ root of NR_CPUS, which allows NR_CPUS to be insanely large.
+ The default value of RCU_FANOUT should be used for production
+ systems, but if you are stress-testing the RCU implementation
+ itself, small RCU_FANOUT values allow you to test large-system
+ code paths on small(er) systems.
+
+ Select a specific number if testing RCU itself.
+ Take the default if unsure.
+
+config RCU_FANOUT_LEAF
+ int "Tree-based hierarchical RCU leaf-level fanout value"
+ range 2 64 if 64BIT
+ range 2 32 if !64BIT
+ depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
+ default 16
+ help
+ This option controls the leaf-level fanout of hierarchical
+ implementations of RCU, and allows trading off cache misses
+ against lock contention. Systems that synchronize their
+ scheduling-clock interrupts for energy-efficiency reasons will
+ want the default because the smaller leaf-level fanout keeps
+ lock contention levels acceptably low. Very large systems
+ (hundreds or thousands of CPUs) will instead want to set this
+ value to the maximum value possible in order to reduce the
+ number of cache misses incurred during RCU's grace-period
+ initialization. These systems tend to run CPU-bound, and thus
+ are not helped by synchronized interrupts, and thus tend to
+ skew them, which reduces lock contention enough that large
+ leaf-level fanouts work well. That said, setting leaf-level
+ fanout to a large number will likely cause problematic
+ lock contention on the leaf-level rcu_node structures unless
+ you boot with the skew_tick kernel parameter.
+
+ Select a specific number if testing RCU itself.
+
+ Select the maximum permissible value for large systems, but
+ please understand that you may also need to set the skew_tick
+ kernel boot parameter to avoid contention on the rcu_node
+ structure's locks.
+
+ Take the default if unsure.
+
+config RCU_FAST_NO_HZ
+ bool "Accelerate last non-dyntick-idle CPU's grace periods"
+ depends on NO_HZ_COMMON && SMP && RCU_EXPERT
+ default n
+ help
+ This option permits CPUs to enter dynticks-idle state even if
+ they have RCU callbacks queued, and prevents RCU from waking
+ these CPUs up more than roughly once every four jiffies (by
+ default, you can adjust this using the rcutree.rcu_idle_gp_delay
+ parameter), thus improving energy efficiency. On the other
+ hand, this option increases the duration of RCU grace periods,
+ for example, slowing down synchronize_rcu().
+
+ Say Y if energy efficiency is critically important, and you
+ don't care about increased grace-period durations.
+
+ Say N if you are unsure.
+
+config RCU_BOOST
+ bool "Enable RCU priority boosting"
+ depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
+ default n
+ help
+ This option boosts the priority of preempted RCU readers that
+ block the current preemptible RCU grace period for too long.
+ This option also prevents heavy loads from blocking RCU
+ callback invocation for all flavors of RCU.
+
+ Say Y here if you are working with real-time apps or heavy loads
+ Say N here if you are unsure.
+
+config RCU_BOOST_DELAY
+ int "Milliseconds to delay boosting after RCU grace-period start"
+ range 0 3000
+ depends on RCU_BOOST
+ default 500
+ help
+ This option specifies the time to wait after the beginning of
+ a given grace period before priority-boosting preempted RCU
+ readers blocking that grace period. Note that any RCU reader
+ blocking an expedited RCU grace period is boosted immediately.
+
+ Accept the default if unsure.
+
+config RCU_NOCB_CPU
+ bool "Offload RCU callback processing from boot-selected CPUs"
+ depends on TREE_RCU || PREEMPT_RCU
+ depends on RCU_EXPERT || NO_HZ_FULL
+ default n
+ help
+ Use this option to reduce OS jitter for aggressive HPC or
+ real-time workloads. It can also be used to offload RCU
+ callback invocation to energy-efficient CPUs in battery-powered
+ asymmetric multiprocessors.
+
+ This option offloads callback invocation from the set of
+ CPUs specified at boot time by the rcu_nocbs parameter.
+ For each such CPU, a kthread ("rcuox/N") will be created to
+ invoke callbacks, where the "N" is the CPU being offloaded,
+ and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
+ "s" for RCU-sched. Nothing prevents this kthread from running
+ on the specified CPUs, but (1) the kthreads may be preempted
+ between each callback, and (2) affinity or cgroups can be used
+ to force the kthreads to run on whatever set of CPUs is desired.
+
+ Say Y here if you want to help to debug reduced OS jitter.
+ Say N here if you are unsure.
+
+endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
new file mode 100644
index 000000000000..0ec7d1d33a14
--- /dev/null
+++ b/kernel/rcu/Kconfig.debug
@@ -0,0 +1,82 @@
+#
+# RCU-related debugging configuration options
+#
+
+menu "RCU Debugging"
+
+config PROVE_RCU
+ def_bool PROVE_LOCKING
+
+config TORTURE_TEST
+ tristate
+ default n
+
+config RCU_PERF_TEST
+ tristate "performance tests for RCU"
+ depends on DEBUG_KERNEL
+ select TORTURE_TEST
+ select SRCU
+ select TASKS_RCU
+ default n
+ help
+ This option provides a kernel module that runs performance
+ tests on the RCU infrastructure. The kernel module may be built
+ after the fact on the running kernel to be tested, if desired.
+
+ Say Y here if you want RCU performance tests to be built into
+ the kernel.
+ Say M if you want the RCU performance tests to build as a module.
+ Say N if you are unsure.
+
+config RCU_TORTURE_TEST
+ tristate "torture tests for RCU"
+ depends on DEBUG_KERNEL
+ select TORTURE_TEST
+ select SRCU
+ select TASKS_RCU
+ default n
+ help
+ This option provides a kernel module that runs torture tests
+ on the RCU infrastructure. The kernel module may be built
+ after the fact on the running kernel to be tested, if desired.
+
+ Say Y here if you want RCU torture tests to be built into
+ the kernel.
+ Say M if you want the RCU torture tests to build as a module.
+ Say N if you are unsure.
+
+config RCU_CPU_STALL_TIMEOUT
+ int "RCU CPU stall timeout in seconds"
+ depends on RCU_STALL_COMMON
+ range 3 300
+ default 21
+ help
+ If a given RCU grace period extends more than the specified
+ number of seconds, a CPU stall warning is printed. If the
+ RCU grace period persists, additional CPU stall warnings are
+ printed at more widely spaced intervals.
+
+config RCU_TRACE
+ bool "Enable tracing for RCU"
+ depends on DEBUG_KERNEL
+ default y if TREE_RCU
+ select TRACE_CLOCK
+ help
+ This option enables additional tracepoints for ftrace-style
+ event tracing.
+
+ Say Y here if you want to enable RCU tracing
+ Say N if you are unsure.
+
+config RCU_EQS_DEBUG
+ bool "Provide debugging asserts for adding NO_HZ support to an arch"
+ depends on DEBUG_KERNEL
+ help
+ This option provides consistency checks in RCU's handling of
+ NO_HZ. These checks have proven quite helpful in detecting
+ bugs in arch-specific NO_HZ code.
+
+ Say N here if you need ultimate kernel/user switch latencies
+ Say Y if you are unsure
+
+endmenu # "RCU Debugging"
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 23803c7d5180..13c0fc852767 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -3,13 +3,11 @@
KCOV_INSTRUMENT := n
obj-y += update.o sync.o
-obj-$(CONFIG_CLASSIC_SRCU) += srcu.o
obj-$(CONFIG_TREE_SRCU) += srcutree.o
obj-$(CONFIG_TINY_SRCU) += srcutiny.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
obj-$(CONFIG_TREE_RCU) += tree.o
obj-$(CONFIG_PREEMPT_RCU) += tree.o
-obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
obj-$(CONFIG_TINY_RCU) += tiny.o
obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 73e16ec4054b..808b8c85f626 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -212,6 +212,18 @@ int rcu_jiffies_till_stall_check(void);
*/
#define TPS(x) tracepoint_string(x)
+/*
+ * Dump the ftrace buffer, but only one time per callsite per boot.
+ */
+#define rcu_ftrace_dump(oops_dump_mode) \
+do { \
+ static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
+ \
+ if (!atomic_read(&___rfd_beenhere) && \
+ !atomic_xchg(&___rfd_beenhere, 1)) \
+ ftrace_dump(oops_dump_mode); \
+} while (0)
+
void rcu_early_boot_tests(void);
void rcu_test_sync_prims(void);
@@ -291,6 +303,271 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
cpu <= rnp->grphi; \
cpu = cpumask_next((cpu), cpu_possible_mask))
+/*
+ * Wrappers for the rcu_node::lock acquire and release.
+ *
+ * Because the rcu_nodes form a tree, the tree traversal locking will observe
+ * different lock values, this in turn means that an UNLOCK of one level
+ * followed by a LOCK