summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-21 19:26:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-21 19:26:51 -0700
commit226da0dbc84ed97f448523e2a4cb91c27fa68ed9 (patch)
tree3969a9f612cd5596747ecde2066e65eacbab7d2e
parent5ec29e3149d800e6db83c1b6ff441daf319cbbe2 (diff)
parent2d84e023cb5ec00403ff5d447533c6fd58fcc7ff (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU changes from Ingo Molnar: "This is the v3.5 RCU tree from Paul E. McKenney: 1) A set of improvements and fixes to the RCU_FAST_NO_HZ feature (with more on the way for 3.6). Posted to LKML: https://lkml.org/lkml/2012/4/23/324 (commits 1-3 and 5), https://lkml.org/lkml/2012/4/16/611 (commit 4), https://lkml.org/lkml/2012/4/30/390 (commit 6), and https://lkml.org/lkml/2012/5/4/410 (commit 7, combined with the other commits for the convenience of the tester). 2) Changes to make rcu_barrier() avoid disrupting execution of CPUs that have no RCU callbacks. Posted to LKML: https://lkml.org/lkml/2012/4/23/322. 3) A couple of commits that improve the efficiency of the interaction between preemptible RCU and the scheduler, these two being all that survived an abortive attempt to allow preemptible RCU's __rcu_read_lock() to be inlined. The full set was posted to LKML at https://lkml.org/lkml/2012/4/14/143, and the first and third patches of that set remain. 4) Lai Jiangshan's algorithmic implementation of SRCU, which includes call_srcu() and srcu_barrier(). A major feature of this new implementation is that synchronize_srcu() no longer disturbs the execution of other CPUs. This work is based on earlier implementations by Peter Zijlstra and Paul E. McKenney. Posted to LKML: https://lkml.org/lkml/2012/2/22/82. 5) A number of miscellaneous bug fixes and improvements which were posted to LKML at: https://lkml.org/lkml/2012/4/23/353 with subsequent updates posted to LKML." * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits) rcu: Make rcu_barrier() less disruptive rcu: Explicitly initialize RCU_FAST_NO_HZ per-CPU variables rcu: Make RCU_FAST_NO_HZ handle timer migration rcu: Update RCU maintainership rcu: Make exit_rcu() more precise and consolidate rcu: Move PREEMPT_RCU preemption to switch_to() invocation rcu: Ensure that RCU_FAST_NO_HZ timers expire on correct CPU rcu: Add rcutorture test for call_srcu() rcu: Implement per-domain single-threaded call_srcu() state machine rcu: Use single value to handle expedited SRCU grace periods rcu: Improve srcu_readers_active_idx()'s cache locality rcu: Remove unused srcu_barrier() rcu: Implement a variant of Peter's SRCU algorithm rcu: Improve SRCU's wait_idx() comments rcu: Flip ->completed only once per SRCU grace period rcu: Increment upper bit only for srcu_read_lock() rcu: Remove fast check path from __synchronize_srcu() rcu: Direct algorithmic SRCU implementation rcu: Introduce rcutorture testing for rcu_barrier() timer: Fix mod_timer_pinned() header comment ...
-rw-r--r--Documentation/RCU/torture.txt15
-rw-r--r--Documentation/kernel-parameters.txt88
-rw-r--r--MAINTAINERS14
-rw-r--r--arch/um/drivers/mconsole_kern.c1
-rw-r--r--include/linux/rculist.h40
-rw-r--r--include/linux/rcupdate.h20
-rw-r--r--include/linux/rcutiny.h11
-rw-r--r--include/linux/rcutree.h19
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/srcu.h48
-rw-r--r--include/trace/events/rcu.h2
-rw-r--r--init/Kconfig50
-rw-r--r--kernel/rcupdate.c28
-rw-r--r--kernel/rcutiny_plugin.h16
-rw-r--r--kernel/rcutorture.c257
-rw-r--r--kernel/rcutree.c332
-rw-r--r--kernel/rcutree.h23
-rw-r--r--kernel/rcutree_plugin.h154
-rw-r--r--kernel/rcutree_trace.c4
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/srcu.c548
-rw-r--r--kernel/timer.c8
-rw-r--r--lib/list_debug.c22
23 files changed, 1358 insertions, 353 deletions
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 375d3fb71437..4ddf3913fd8c 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -47,6 +47,16 @@ irqreader Says to invoke RCU readers from irq level. This is currently
permit this. (Or, more accurately, variants of RCU that do
-not- permit this know to ignore this variable.)
+n_barrier_cbs If this is nonzero, RCU barrier testing will be conducted,
+ in which case n_barrier_cbs specifies the number of
+ RCU callbacks (and corresponding kthreads) to use for
+ this testing. The value cannot be negative. If you
+ specify this to be non-zero when torture_type indicates a
+ synchronous RCU implementation (one for which a member of
+ the synchronize_rcu() rather than the call_rcu() family is
+ used -- see the documentation for torture_type below), an
+ error will be reported and no testing will be carried out.
+
nfakewriters This is the number of RCU fake writer threads to run. Fake
writer threads repeatedly use the synchronous "wait for
current readers" function of the interface selected by
@@ -188,7 +198,7 @@ OUTPUT
The statistics output is as follows:
rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
- rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
+ rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
@@ -230,6 +240,9 @@ o "rtmbe": A non-zero value indicates that rcutorture believes that
rcu_assign_pointer() and rcu_dereference() are not working
correctly. This value should be zero.
+o "rtbe": A non-zero value indicates that one of the rcu_barrier()
+ family of functions is not working correctly.
+
o "rtbke": rcutorture was unable to create the real-time kthreads
used to force RCU priority inversion. This value should be zero.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f995195409fd..0e90453e4acb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2333,18 +2333,100 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
See Documentation/blockdev/ramdisk.txt.
- rcupdate.blimit= [KNL,BOOT]
+ rcutree.blimit= [KNL,BOOT]
Set maximum number of finished RCU callbacks to process
in one batch.
- rcupdate.qhimark= [KNL,BOOT]
+ rcutree.qhimark= [KNL,BOOT]
Set threshold of queued
RCU callbacks over which batch limiting is disabled.
- rcupdate.qlowmark= [KNL,BOOT]
+ rcutree.qlowmark= [KNL,BOOT]
Set threshold of queued RCU callbacks below which
batch limiting is re-enabled.
+ rcutree.rcu_cpu_stall_suppress= [KNL,BOOT]
+ Suppress RCU CPU stall warning messages.
+
+ rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
+ Set timeout for RCU CPU stall warning messages.
+
+ rcutorture.fqs_duration= [KNL,BOOT]
+ Set duration of force_quiescent_state bursts.
+
+ rcutorture.fqs_holdoff= [KNL,BOOT]
+ Set holdoff time within force_quiescent_state bursts.
+
+ rcutorture.fqs_stutter= [KNL,BOOT]
+ Set wait time between force_quiescent_state bursts.
+
+ rcutorture.irqreader= [KNL,BOOT]
+ Test RCU readers from irq handlers.
+
+ rcutorture.n_barrier_cbs= [KNL,BOOT]
+ Set callbacks/threads for rcu_barrier() testing.
+
+ rcutorture.nfakewriters= [KNL,BOOT]
+ Set number of concurrent RCU writers. These just
+ stress RCU, they don't participate in the actual
+ test, hence the "fake".
+
+ rcutorture.nreaders= [KNL,BOOT]
+ Set number of RCU readers.
+
+ rcutorture.onoff_holdoff= [KNL,BOOT]
+ Set time (s) after boot for CPU-hotplug testing.
+
+ rcutorture.onoff_interval= [KNL,BOOT]
+ Set time (s) between CPU-hotplug operations, or
+ zero to disable CPU-hotplug testing.
+
+ rcutorture.shuffle_interval= [KNL,BOOT]
+ Set task-shuffle interval (s). Shuffling tasks
+ allows some CPUs to go into dyntick-idle mode
+ during the rcutorture test.
+
+ rcutorture.shutdown_secs= [KNL,BOOT]
+ Set time (s) after boot system shutdown. This
+ is useful for hands-off automated testing.
+
+ rcutorture.stall_cpu= [KNL,BOOT]
+ Duration of CPU stall (s) to test RCU CPU stall
+ warnings, zero to disable.
+
+ rcutorture.stall_cpu_holdoff= [KNL,BOOT]
+ Time to wait (s) after boot before inducing stall.
+
+ rcutorture.stat_interval= [KNL,BOOT]
+ Time (s) between statistics printk()s.
+
+ rcutorture.stutter= [KNL,BOOT]
+ Time (s) to stutter testing, for example, specifying
+ five seconds causes the test to run for five seconds,
+ wait for five seconds, and so on. This tests RCU's
+ ability to transition abruptly to and from idle.
+
+ rcutorture.test_boost= [KNL,BOOT]
+ Test RCU priority boosting? 0=no, 1=maybe, 2=yes.
+ "Maybe" means test if the RCU implementation
+ under test support RCU priority boosting.
+
+ rcutorture.test_boost_duration= [KNL,BOOT]
+ Duration (s) of each individual boost test.
+
+ rcutorture.test_boost_interval= [KNL,BOOT]
+ Interval (s) between each boost test.
+
+ rcutorture.test_no_idle_hz= [KNL,BOOT]
+ Test RCU's dyntick-idle handling. See also the
+ rcutorture.shuffle_interval parameter.
+
+ rcutorture.torture_type= [KNL,BOOT]
+ Specify the RCU implementation to test.
+
+ rcutorture.verbose= [KNL,BOOT]
+ Enable additional printk() statements.
+
rdinit= [KNL]
Format: <full_path>
Run specified binary instead of /init from the ramdisk,
diff --git a/MAINTAINERS b/MAINTAINERS
index 73a8b561414b..5ccca1ca0077 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5598,14 +5598,13 @@ F: net/rds/
READ-COPY UPDATE (RCU)
M: Dipankar Sarma <dipankar@in.ibm.com>
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-W: http://www.rdrop.com/users/paulmck/rclock/
+W: http://www.rdrop.com/users/paulmck/RCU/
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
F: Documentation/RCU/
+X: Documentation/RCU/torture.txt
F: include/linux/rcu*
-F: include/linux/srcu*
F: kernel/rcu*
-F: kernel/srcu*
X: kernel/rcutorture.c
REAL TIME CLOCK (RTC) SUBSYSTEM
@@ -6122,6 +6121,15 @@ S: Maintained
F: include/linux/sl?b*.h
F: mm/sl?b.c
+SLEEPABLE READ-COPY UPDATE (SRCU)
+M: Lai Jiangshan <laijs@cn.fujitsu.com>
+M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+W: http://www.rdrop.com/users/paulmck/RCU/
+S: Supported
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+F: include/linux/srcu*
+F: kernel/srcu*
+
SMC91x ETHERNET DRIVER
M: Nicolas Pitre <nico@fluxnic.net>
S: Odd Fixes
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 43b39d61b538..88e466b159dc 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -705,6 +705,7 @@ static void stack_proc(void *arg)
struct task_struct *from = current, *to = arg;
to->thread.saved_task = from;
+ rcu_switch_from(from);
switch_to(from, to, from);
}
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index d079290843a9..e0f0fab20415 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -30,6 +30,7 @@
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
+#ifndef CONFIG_DEBUG_LIST
static inline void __list_add_rcu(struct list_head *new,
struct list_head *prev, struct list_head *next)
{
@@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new,
rcu_assign_pointer(list_next_rcu(prev), new);
next->prev = new;
}
+#else
+extern void __list_add_rcu(struct list_head *new,
+ struct list_head *prev, struct list_head *next);
+#endif
/**
* list_add_rcu - add a new entry to rcu-protected list
@@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new,
*/
static inline void list_del_rcu(struct list_head *entry)
{
- __list_del(entry->prev, entry->next);
+ __list_del_entry(entry);
entry->prev = LIST_POISON2;
}
@@ -228,18 +233,43 @@ static inline void list_splice_init_rcu(struct list_head *list,
})
/**
- * list_first_entry_rcu - get the first element from a list
+ * Where are list_empty_rcu() and list_first_entry_rcu()?
+ *
+ * Implementing those functions following their counterparts list_empty() and
+ * list_first_entry() is not advisable because they lead to subtle race
+ * conditions as the following snippet shows:
+ *
+ * if (!list_empty_rcu(mylist)) {
+ * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
+ * do_something(bar);
+ * }
+ *
+ * The list may not be empty when list_empty_rcu checks it, but it may be when
+ * list_first_entry_rcu rereads the ->next pointer.
+ *
+ * Rereading the ->next pointer is not a problem for list_empty() and
+ * list_first_entry() because they would be protected by a lock that blocks
+ * writers.
+ *
+ * See list_first_or_null_rcu for an alternative.
+ */
+
+/**
+ * list_first_or_null_rcu - get the first element from a list
* @ptr: the list head to take the element from.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*
- * Note, that list is expected to be not empty.
+ * Note that if the list is empty, it returns NULL.
*
* This primitive may safely run concurrently with the _rcu list-mutation
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
-#define list_first_entry_rcu(ptr, type, member) \
- list_entry_rcu((ptr)->next, type, member)
+#define list_first_or_null_rcu(ptr, type, member) \
+ ({struct list_head *__ptr = (ptr); \
+ struct list_head __rcu *__next = list_next_rcu(__ptr); \
+ likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \
+ })
/**
* list_for_each_entry_rcu - iterate over rcu list of given type
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 20fb776a1d4a..26d1a47591f1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -184,12 +184,14 @@ static inline int rcu_preempt_depth(void)
/* Internal to kernel */
extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
+extern void rcu_preempt_note_context_switch(void);
extern void rcu_check_callbacks(int cpu, int user);
struct notifier_block;
extern void rcu_idle_enter(void);
extern void rcu_idle_exit(void);
extern void rcu_irq_enter(void);
extern void rcu_irq_exit(void);
+extern void exit_rcu(void);
/**
* RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
@@ -922,6 +924,21 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
kfree_call_rcu(head, (rcu_callback)offset);
}
+/*
+ * Does the specified offset indicate that the corresponding rcu_head
+ * structure can be handled by kfree_rcu()?
+ */
+#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
+
+/*
+ * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
+ */
+#define __kfree_rcu(head, offset) \
+ do { \
+ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
+ call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+ } while (0)
+
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree
@@ -944,6 +961,9 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
*
* Note that the allowable offset might decrease in the future, for example,
* to allow something like kmem_cache_free_rcu().
+ *
+ * The BUILD_BUG_ON check must not involve any function calls, hence the
+ * checks are done in macros here.
*/
#define kfree_rcu(ptr, rcu_head) \
__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e93df77176d1..adb5e5a38cae 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,14 +87,6 @@ static inline void kfree_call_rcu(struct rcu_head *head,
#ifdef CONFIG_TINY_RCU
-static inline void rcu_preempt_note_context_switch(void)
-{
-}
-
-static inline void exit_rcu(void)
-{
-}
-
static inline int rcu_needs_cpu(int cpu)
{
return 0;
@@ -102,8 +94,6 @@ static inline int rcu_needs_cpu(int cpu)
#else /* #ifdef CONFIG_TINY_RCU */
-void rcu_preempt_note_context_switch(void);
-extern void exit_rcu(void);
int rcu_preempt_needs_cpu(void);
static inline int rcu_needs_cpu(int cpu)
@@ -116,7 +106,6 @@ static inline int rcu_needs_cpu(int cpu)
static inline void rcu_note_context_switch(int cpu)
{
rcu_sched_qs(cpu);
- rcu_preempt_note_context_switch();
}
/*
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index e8ee5dd0854c..3c6083cde4fc 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,18 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
rcu_note_context_switch(cpu);
}
-#ifdef CONFIG_TREE_PREEMPT_RCU
-
-extern void exit_rcu(void);
-
-#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-static inline void exit_rcu(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
-
extern void synchronize_rcu_bh(void);
extern void synchronize_sched_expedited(void);
extern void synchronize_rcu_expedited(void);
@@ -98,13 +86,6 @@ extern void rcu_force_quiescent_state(void);
extern void rcu_bh_force_quiescent_state(void);
extern void rcu_sched_force_quiescent_state(void);
-/* A context switch is a grace period for RCU-sched and RCU-bh. */
-static inline int rcu_blocking_is_gp(void)
-{
- might_sleep(); /* Check for RCU read-side critical section. */
- return num_online_cpus() == 1;
-}
-
extern void rcu_scheduler_starting(void);
extern int rcu_scheduler_active __read_mostly;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 81a173c0897d..8f3fd945070f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1905,12 +1905,22 @@ static inline void rcu_copy_process(struct task_struct *p)
INIT_LIST_HEAD(&p->rcu_node_entry);
}
+static inline void rcu_switch_from(struct task_struct *prev)
+{
+ if (prev->rcu_read_lock_nesting != 0)
+ rcu_preempt_note_context_switch();
+}
+
#else
static inline void rcu_copy_process(struct task_struct *p)
{
}
+static inline void rcu_switch_from(struct task_struct *prev)
+{
+}
+
#endif
#ifdef CONFIG_SMP
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index d3d5fa54f25e..55a5c52cbb25 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -29,26 +29,35 @@
#include <linux/mutex.h>
#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
struct srcu_struct_array {
- int c[2];
+ unsigned long c[2];
+ unsigned long seq[2];
+};
+
+struct rcu_batch {
+ struct rcu_head *head, **tail;
};
struct srcu_struct {
- int completed;
+ unsigned completed;
struct srcu_struct_array __percpu *per_cpu_ref;
- struct mutex mutex;
+ spinlock_t queue_lock; /* protect ->batch_queue, ->running */
+ bool running;
+ /* callbacks just queued */
+ struct rcu_batch batch_queue;
+ /* callbacks try to do the first check_zero */
+ struct rcu_batch batch_check0;
+ /* callbacks done with the first check_zero and the flip */
+ struct rcu_batch batch_check1;
+ struct rcu_batch batch_done;
+ struct delayed_work work;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
};
-#ifndef CONFIG_PREEMPT
-#define srcu_barrier() barrier()
-#else /* #ifndef CONFIG_PREEMPT */
-#define srcu_barrier()
-#endif /* #else #ifndef CONFIG_PREEMPT */
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int __init_srcu_struct(struct srcu_struct *sp, const char *name,
@@ -67,12 +76,33 @@ int init_srcu_struct(struct srcu_struct *sp);
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+/**
+ * call_srcu() - Queue a callback for invocation after an SRCU grace period
+ * @sp: srcu_struct in queue the callback
+ * @head: structure to be used for queueing the SRCU callback.
+ * @func: function to be invoked after the SRCU grace period
+ *
+ * The callback function will be invoked some time after a full SRCU
+ * grace period elapses, in other words after all pre-existing SRCU
+ * read-side critical sections have completed. However, the callback
+ * function might well execute concurrently with other SRCU read-side
+ * critical sections that started after call_srcu() was invoked. SRCU
+ * read-side critical sections are delimited by srcu_read_lock() and
+ * srcu_read_unlock(), and may be nested.
+ *
+ * The callback will be invoked from process context, but must nevertheless
+ * be fast and must not block.
+ */
+void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
+
void cleanup_srcu_struct(struct srcu_struct *sp);
int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
void synchronize_srcu(struct srcu_struct *sp);
void synchronize_srcu_expedited(struct srcu_struct *sp);
long srcu_batches_completed(struct srcu_struct *sp);
+void srcu_barrier(struct srcu_struct *sp);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 337099783f37..1480900c511c 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -292,6 +292,8 @@ TRACE_EVENT(rcu_dyntick,
* "More callbacks": Still more callbacks, try again to clear them out.
* "Callbacks drained": All callbacks processed, off to dyntick idle!
* "Timer": Timer fired to cause CPU to continue processing callbacks.
+ * "Demigrate": Timer fired on wrong CPU, woke up correct CPU.
+ * "Cleanup after idle": Idle exited, timer canceled.
*/
TRACE_EVENT(rcu_prep_idle,
diff --git a/init/Kconfig b/init/Kconfig
index 6cfd71d06463..6d18ef8071b5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -458,6 +458,33 @@ config RCU_FANOUT
Select a specific number if testing RCU itself.
Take the default if unsure.
+config RCU_FANOUT_LEAF
+ int "Tree-based hierarchical RCU leaf-level fanout value"
+ range 2 RCU_FANOUT if 64BIT
+ range 2 RCU_FANOUT if !64BIT
+ depends on TREE_RCU || TREE_PREEMPT_RCU
+ default 16
+ help
+ This option controls the leaf-level fanout of hierarchical
+ implementations of RCU, and allows trading off cache misses
+ against lock contention. Systems that synchronize their
+ scheduling-clock interrupts for energy-efficiency reasons will
+ want the default because the smaller leaf-level fanout keeps
+ lock contention levels acceptably low. Very large systems
+ (hundreds or thousands of CPUs) will instead want to set this
+ value to the maximum value possible in order to reduce the
+ number of cache misses incurred during RCU's grace-period
+ initialization. These systems tend to run CPU-bound, and thus
+ are not helped by synchronized interrupts, and thus tend to
+ skew them, which reduces lock contention enough that large
+ leaf-level fanouts work well.
+
+ Select a specific number if testing RCU itself.
+
+ Select the maximum permissible value for large systems.
+
+ Take the default if unsure.
+
config RCU_FANOUT_EXACT
bool "Disable tree-based hierarchical RCU auto-balancing"
depends on TREE_RCU || TREE_PREEMPT_RCU
@@ -515,10 +542,25 @@ config RCU_BOOST_PRIO
depends on RCU_BOOST
default 1
help
- This option specifies the real-time priority to which preempted
- RCU readers are to be boosted. If you are working with CPU-bound
- real-time applications, you should specify a priority higher then
- the highest-priority CPU-bound application.
+ This option specifies the real-time priority to which long-term
+ preempted RCU readers are to be boosted. If you are working
+ with a real-time application that has one or more CPU-bound
+ threads running at a real-time priority level, you should set
+ RCU_BOOST_PRIO to a priority higher then the highest-priority
+ real-time CPU-bound thread. The default RCU_BOOST_PRIO value
+ of 1 is appropriate in the common case, which is real-time
+ applications that do not have any CPU-bound threads.
+
+ Some real-time applications might not have a single real-time
+ thread that saturates a given CPU, but instead might have
+ multiple real-time threads that, taken together, fully utilize
+ that CPU. In this case, you should set RCU_BOOST_PRIO to
+ a priority higher than the lowest-priority thread that is
+ conspiring to prevent the CPU from running any non-real-time
+ tasks. For example, if one thread at priority 10 and another
+ thread at priority 5 are between themselves fully consuming
+ the CPU time on a given CPU, then RCU_BOOST_PRIO should be
+ set to priority 6 or higher.
Specify the real-time priority, or take the default if unsure.
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a86f1741cc27..95cba41ce1e9 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -51,6 +51,34 @@
#include "rcu.h"
+#ifdef CONFIG_PREEMPT_RCU
+
+/*
+ * Check for a task exiting while in a preemptible-RCU read-side
+ * critical section, clean up if so. No need to issue warnings,
+ * as debug_check_no_locks_held() already does this if lockdep
+ * is enabled.
+ */
+void exit_rcu(void)
+{
+ struct task_struct *t = current;
+
+ if (likely(list_empty(&current->rcu_node_entry)))
+ return;
+ t->rcu_read_lock_nesting = 1;
+ barrier();
+ t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
+ __rcu_read_unlock();
+}
+
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+void exit_rcu(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 22ecea0dfb62..fc31a2d65100 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -851,22 +851,6 @@ int rcu_preempt_needs_cpu(void)
return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
}
-/*
- * Check for a task exiting while in a preemptible -RCU read-side
- * critical section, clean up if so. No need to issue warnings,
- * as debug_check_no_locks_held() already does this if lockdep
- * is enabled.
- */
-void exit_rcu(void)
-{
- struct task_struct *t = current;
-
- if (t->rcu_read_lock_nesting == 0)
- return;
- t->rcu_read_lock_nesting = 1;
- __rcu_read_unlock();
-}
-
#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a89b381a8c6e..e66b34ab7555 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -64,6 +64,7 @@ static int irqreader = 1; /* RCU readers from irq (timers). */
static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
static int fqs_holdoff; /* Hold time within burst (us). */
static int fqs_stutter = 3; /* Wait time between bursts (s). */
+static int n_barrier_cbs; /* Number of callbacks to test RCU barriers. */
static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */
static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
@@ -96,6 +97,8 @@ module_param(fqs_holdoff, int, 0444);
MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
module_param(fqs_stutter, int, 0444);
MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
+module_param(n_barrier_cbs, int, 0444);
+MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing");
module_param(onoff_interval, int, 0444);
MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
module_param(onoff_holdoff, int, 0444);
@@ -139,6 +142,8 @@ static struct task_struct *shutdown_task;
static struct task_struct *onoff_task;
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
static struct task_struct *stall_task;
+static struct task_struct **barrier_cbs_tasks;
+static struct task_struct *barrier_task;
#define RCU_TORTURE_PIPE_LEN 10
@@ -164,6 +169,7 @@ static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_error;
+static long n_rcu_torture_barrier_error;
static long n_rcu_torture_boost_ktrerror;
static long n_rcu_torture_boost_rterror;
static long n_rcu_torture_boost_failure;
@@ -173,6 +179,8 @@ static long n_offline_attempts;
static long n_offline_successes;
static long n_online_attempts;
static long n_online_successes;
+static long n_barrier_attempts;
+static long n_barrier_successes;
static struct list_head rcu_torture_removed;
static cpumask_var_t shuffle_tmp_mask;
@@ -197,6 +205,10 @@ static unsigned long shutdown_time; /* jiffies to system shutdown. */
static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
+static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
+static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
+static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
+static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
@@ -327,6 +339,7 @@ struct rcu_torture_ops {
int (*completed)(void);
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
+ void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
void (*cb_barrier)(void);
void (*fqs)(void);
int (*stats)(char *page);
@@ -417,6 +430,7 @@ static struct rcu_torture_ops rcu_ops = {
.completed = rcu_torture_completed,
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
+ .call = call_rcu,
.cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
@@ -460,6 +474,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
.completed = rcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
+ .call = NULL,
.cb_barrier = NULL,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
@@ -477,6 +492,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
.completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_expedited,
+ .call = NULL,
.cb_barrier = NULL,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
@@ -519,6 +535,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_bh_torture_deferred_free,
.sync = synchronize_rcu_bh,
+ .call = call_rcu_bh,
.cb_barrier = rcu_barrier_bh,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
@@ -535,6 +552,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_bh,
+ .call = NULL,
.cb_barrier = NULL,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
@@ -551,6 +569,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = {
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_bh_expedited,
+ .call = NULL,
.cb_barrier = NULL,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
@@ -606,6 +625,11 @@ static int srcu_torture_completed(void)
return srcu_batches_completed(&srcu_ctl);
}
+static void srcu_torture_deferred_free(struct rcu_torture *rp)
+{
+ call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb);
+}
+
static void srcu_torture_synchronize(void)
{
synchronize_srcu(&srcu_ctl);
@@ -620,7 +644,7 @@ static int srcu_torture_stats(char *page)
cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
torture_type, TORTURE_FLAG, idx);
for_each_possible_cpu(cpu) {
- cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu,