summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/NMI-RCU.txt2
-rw-r--r--Documentation/RCU/lockdep-splat.txt110
-rw-r--r--Documentation/RCU/lockdep.txt34
-rw-r--r--Documentation/RCU/torture.txt137
-rw-r--r--Documentation/RCU/trace.txt38
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/rcupdate.h300
-rw-r--r--include/linux/rcutiny.h20
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/types.h10
-rw-r--r--include/trace/events/rcu.h459
-rw-r--r--init/Kconfig6
-rw-r--r--kernel/lockdep.c84
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/rcu.h85
-rw-r--r--kernel/rcupdate.c26
-rw-r--r--kernel/rcutiny.c117
-rw-r--r--kernel/rcutiny_plugin.h134
-rw-r--r--kernel/rcutorture.c77
-rw-r--r--kernel/rcutree.c290
-rw-r--r--kernel/rcutree.h17
-rw-r--r--kernel/rcutree_plugin.h150
-rw-r--r--kernel/rcutree_trace.c13
-rw-r--r--kernel/rtmutex.c8
-rw-r--r--kernel/sched.c13
-rw-r--r--kernel/time/tick-sched.c6
27 files changed, 1489 insertions, 659 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index bf82851a0e57..687777f83b23 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -95,7 +95,7 @@ not to return until all ongoing NMI handlers exit. It is therefore safe
to free up the handler's data as soon as synchronize_sched() returns.
Important note: for this to work, the architecture in question must
-invoke irq_enter() and irq_exit() on NMI entry and exit, respectively.
+invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
Answer to Quick Quiz
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt
new file mode 100644
index 000000000000..bf9061142827
--- /dev/null
+++ b/Documentation/RCU/lockdep-splat.txt
@@ -0,0 +1,110 @@
+Lockdep-RCU was added to the Linux kernel in early 2010
+(http://lwn.net/Articles/371986/). This facility checks for some common
+misuses of the RCU API, most notably using one of the rcu_dereference()
+family to access an RCU-protected pointer without the proper protection.
+When such misuse is detected, an lockdep-RCU splat is emitted.
+
+The usual cause of a lockdep-RCU slat is someone accessing an
+RCU-protected data structure without either (1) being in the right kind of
+RCU read-side critical section or (2) holding the right update-side lock.
+This problem can therefore be serious: it might result in random memory
+overwriting or worse. There can of course be false positives, this
+being the real world and all that.
+
+So let's look at an example RCU lockdep splat from 3.0-rc5, one that
+has long since been fixed:
+
+===============================
+[ INFO: suspicious RCU usage. ]
+-------------------------------
+block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
+
+other info that might help us debug this:
+
+
+rcu_scheduler_active = 1, debug_locks = 0
+3 locks held by scsi_scan_6/1552:
+ #0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>]
+scsi_scan_host_selected+0x5a/0x150
+ #1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>]
+elevator_exit+0x22/0x60
+ #2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>]
+cfq_exit_queue+0x43/0x190
+
+stack backtrace:
+Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
+Call Trace:
+ [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
+ [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
+ [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
+ [<ffffffff812a5046>] elevator_exit+0x36/0x60
+ [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
+ [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
+ [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
+ [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
+ [<ffffffff817da069>] ? error_exit+0x29/0xb0
+ [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
+ [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
+ [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
+ [<ffffffff817da069>] ? error_exit+0x29/0xb0
+ [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
+ [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
+ [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
+ [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
+ [<ffffffff8145f170>] do_scan_async+0x20/0x160
+ [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
+ [<ffffffff810975b6>] kthread+0xa6/0xb0
+ [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
+ [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
+ [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
+ [<ffffffff81097510>] ? __init_kthread_worker+0x70/0x70
+ [<ffffffff817db150>] ? gs_change+0xb/0xb
+
+Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows:
+
+ if (rcu_dereference(ioc->ioc_data) == cic) {
+
+This form says that it must be in a plain vanilla RCU read-side critical
+section, but the "other info" list above shows that this is not the
+case. Instead, we hold three locks, one of which might be RCU related.
+And maybe that lock really does protect this reference. If so, the fix
+is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
+take the struct request_queue "q" from cfq_exit_queue() as an argument,
+which would permit us to invoke rcu_dereference_protected as follows:
+
+ if (rcu_dereference_protected(ioc->ioc_data,
+ lockdep_is_held(&q->queue_lock)) == cic) {
+
+With this change, there would be no lockdep-RCU splat emitted if this
+code was invoked either from within an RCU read-side critical section
+or with the ->queue_lock held. In particular, this would have suppressed
+the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
+list above).
+
+On the other hand, perhaps we really do need an RCU read-side critical
+section. In this case, the critical section must span the use of the
+return value from rcu_dereference(), or at least until there is some
+reference count incremented or some such. One way to handle this is to
+add rcu_read_lock() and rcu_read_unlock() as follows:
+
+ rcu_read_lock();
+ if (rcu_dereference(ioc->ioc_data) == cic) {
+ spin_lock(&ioc->lock);
+ rcu_assign_pointer(ioc->ioc_data, NULL);
+ spin_unlock(&ioc->lock);
+ }
+ rcu_read_unlock();
+
+With this change, the rcu_dereference() is always within an RCU
+read-side critical section, which again would have suppressed the
+above lockdep-RCU splat.
+
+But in this particular case, we don't actually deference the pointer
+returned from rcu_dereference(). Instead, that pointer is just compared
+to the cic pointer, which means that the rcu_dereference() can be replaced
+by rcu_access_pointer() as follows:
+
+ if (rcu_access_pointer(ioc->ioc_data) == cic) {
+
+Because it is legal to invoke rcu_access_pointer() without protection,
+this change would also suppress the above lockdep-RCU splat.
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index d7a49b2f6994..a102d4b3724b 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -32,9 +32,27 @@ checking of rcu_dereference() primitives:
srcu_dereference(p, sp):
Check for SRCU read-side critical section.
rcu_dereference_check(p, c):
- Use explicit check expression "c". This is useful in
- code that is invoked by both readers and updaters.
- rcu_dereference_raw(p)
+ Use explicit check expression "c" along with
+ rcu_read_lock_held(). This is useful in code that is
+ invoked by both RCU readers and updaters.
+ rcu_dereference_bh_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_bh_held(). This is useful in code that
+ is invoked by both RCU-bh readers and updaters.
+ rcu_dereference_sched_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_sched_held(). This is useful in code that
+ is invoked by both RCU-sched readers and updaters.
+ srcu_dereference_check(p, c):
+ Use explicit check expression "c" along with
+ srcu_read_lock_held()(). This is useful in code that
+ is invoked by both SRCU readers and updaters.
+ rcu_dereference_index_check(p, c):
+ Use explicit check expression "c", but the caller
+ must supply one of the rcu_read_lock_held() functions.
+ This is useful in code that uses RCU-protected arrays
+ that is invoked by both RCU readers and updaters.
+ rcu_dereference_raw(p):
Don't check. (Use sparingly, if at all.)
rcu_dereference_protected(p, c):
Use explicit check expression "c", and omit all barriers
@@ -48,13 +66,11 @@ checking of rcu_dereference() primitives:
value of the pointer itself, for example, against NULL.
The rcu_dereference_check() check expression can be any boolean
-expression, but would normally include one of the rcu_read_lock_held()
-family of functions and a lockdep expression. However, any boolean
-expression can be used. For a moderately ornate example, consider
-the following:
+expression, but would normally include a lockdep expression. However,
+any boolean expression can be used. For a moderately ornate example,
+consider the following:
file = rcu_dereference_check(fdt->fd[fd],
- rcu_read_lock_held() ||
lockdep_is_held(&files->file_lock) ||
atomic_read(&files->count) == 1);
@@ -62,7 +78,7 @@ This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
and, if CONFIG_PROVE_RCU is configured, verifies that this expression
is used in:
-1. An RCU read-side critical section, or
+1. An RCU read-side critical section (implicit), or
2. with files->file_lock held, or
3. on an unshared files_struct.
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 5d9016795fd8..783d6c134d3f 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -42,7 +42,7 @@ fqs_holdoff Holdoff time (in microseconds) between consecutive calls
fqs_stutter Wait time (in seconds) between consecutive bursts
of calls to force_quiescent_state().
-irqreaders Says to invoke RCU readers from irq level. This is currently
+irqreader Says to invoke RCU readers from irq level. This is currently
done via timers. Defaults to "1" for variants of RCU that
permit this. (Or, more accurately, variants of RCU that do
-not- permit this know to ignore this variable.)
@@ -79,19 +79,68 @@ stutter The length of time to run the test before pausing for this
Specifying "stutter=0" causes the test to run continuously
without pausing, which is the old default behavior.
+test_boost Whether or not to test the ability of RCU to do priority
+ boosting. Defaults to "test_boost=1", which performs
+ RCU priority-inversion testing only if the selected
+ RCU implementation supports priority boosting. Specifying
+ "test_boost=0" never performs RCU priority-inversion
+ testing. Specifying "test_boost=2" performs RCU
+ priority-inversion testing even if the selected RCU
+ implementation does not support RCU priority boosting,
+ which can be used to test rcutorture's ability to
+ carry out RCU priority-inversion testing.
+
+test_boost_interval
+ The number of seconds in an RCU priority-inversion test
+ cycle. Defaults to "test_boost_interval=7". It is
+ usually wise for this value to be relatively prime to
+ the value selected for "stutter".
+
+test_boost_duration
+ The number of seconds to do RCU priority-inversion testing
+ within any given "test_boost_interval". Defaults to
+ "test_boost_duration=4".
+
test_no_idle_hz Whether or not to test the ability of RCU to operate in
a kernel that disables the scheduling-clock interrupt to
idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
Defaults to omitting this test.
-torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API,
- "rcu_sync" for rcu_read_lock() with synchronous reclamation,
- "rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for
- rcu_read_lock_bh() with synchronous reclamation, "srcu" for
- the "srcu_read_lock()" API, "sched" for the use of
- preempt_disable() together with synchronize_sched(),
- and "sched_expedited" for the use of preempt_disable()
- with synchronize_sched_expedited().
+torture_type The type of RCU to test, with string values as follows:
+
+ "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu().
+
+ "rcu_sync": rcu_read_lock(), rcu_read_unlock(), and
+ synchronize_rcu().
+
+ "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and
+ synchronize_rcu_expedited().
+
+ "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
+ call_rcu_bh().
+
+ "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(),
+ and synchronize_rcu_bh().
+
+ "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(),
+ and synchronize_rcu_bh_expedited().
+
+ "srcu": srcu_read_lock(), srcu_read_unlock() and
+ synchronize_srcu().
+
+ "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
+ synchronize_srcu_expedited().
+
+ "sched": preempt_disable(), preempt_enable(), and
+ call_rcu_sched().
+
+ "sched_sync": preempt_disable(), preempt_enable(), and
+ synchronize_sched().
+
+ "sched_expedited": preempt_disable(), preempt_enable(), and
+ synchronize_sched_expedited().
+
+ Defaults to "rcu".
verbose Enable debug printk()s. Default is disabled.
@@ -100,12 +149,12 @@ OUTPUT
The statistics output is as follows:
- rcu-torture: --- Start of test: nreaders=16 stat_interval=0 verbose=0
- rcu-torture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915
- rcu-torture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0
- rcu-torture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0
- rcu-torture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0
- rcu-torture: --- End of test
+ rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+ rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
+ rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
+ rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
+ rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
+ rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
The command "dmesg | grep torture:" will extract this information on
most systems. On more esoteric configurations, it may be necessary to
@@ -113,26 +162,55 @@ use other commands to access the output of the printk()s used by
the RCU torture test. The printk()s use KERN_ALERT, so they should
be evident. ;-)
+The first and last lines show the rcutorture module parameters, and the
+last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
+automatic determination as to whether RCU operated correctly.
+
The entries are as follows:
o "rtc": The hexadecimal address of the structure currently visible
to readers.
-o "ver": The number of times since boot that the rcutw writer task
+o "ver": The number of times since boot that the RCU writer task
has changed the structure visible to readers.
o "tfle": If non-zero, indicates that the "torture freelist"
- containing structure to be placed into the "rtc" area is empty.
+ containing structures to be placed into the "rtc" area is empty.
This condition is important, since it can fool you into thinking
that RCU is working when it is not. :-/
o "rta": Number of structures allocated from the torture freelist.
o "rtaf": Number of allocations from the torture freelist that have
- failed due to the list being empty.
+ failed due to the list being empty. It is not unusual for this
+ to be non-zero, but it is bad for it to be a large fraction of
+ the value indicated by "rta".
o "rtf": Number of frees into the torture freelist.
+o "rtmbe": A non-zero value indicates that rcutorture believes that
+ rcu_assign_pointer() and rcu_dereference() are not working
+ correctly. This value should be zero.
+
+o "rtbke": rcutorture was unable to create the real-time kthreads
+ used to force RCU priority inversion. This value should be zero.
+
+o "rtbre": Although rcutorture successfully created the kthreads
+ used to force RCU priority inversion, it was unable to set them
+ to the real-time priority level of 1. This value should be zero.
+
+o "rtbf": The number of times that RCU priority boosting failed
+ to resolve RCU priority inversion.
+
+o "rtb": The number of times that rcutorture attempted to force
+ an RCU priority inversion condition. If you are testing RCU
+ priority boosting via the "test_boost" module parameter, this
+ value should be non-zero.
+
+o "nt": The number of times rcutorture ran RCU read-side code from
+ within a timer handler. This value should be non-zero only
+ if you specified the "irqreader" module parameter.
+
o "Reader Pipe": Histogram of "ages" of structures seen by readers.
If any entries past the first two are non-zero, RCU is broken.
And rcutorture prints the error flag string "!!!" to make sure
@@ -162,26 +240,15 @@ o "Free-Block Circulation": Shows the number of torture structures
somehow gets incremented farther than it should.
Different implementations of RCU can provide implementation-specific
-additional information. For example, SRCU provides the following:
+additional information. For example, SRCU provides the following
+additional line:
- srcu-torture: rtc: f8cf46a8 ver: 355 tfle: 0 rta: 356 rtaf: 0 rtf: 346 rtmbe: 0
- srcu-torture: Reader Pipe: 559738 939 0 0 0 0 0 0 0 0 0
- srcu-torture: Reader Batch: 560434 243 0 0 0 0 0 0 0 0
- srcu-torture: Free-Block Circulation: 355 354 353 352 351 350 349 348 347 346 0
srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1)
-The first four lines are similar to those for RCU. The last line shows
-the per-CPU counter state. The numbers in parentheses are the values
-of the "old" and "current" counters for the corresponding CPU. The
-"idx" value maps the "old" and "current" values to the underlying array,
-and is useful for debugging.
-
-Similarly, sched_expedited RCU provides the following:
-
- sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319
- sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
- sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
- sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
+This line shows the per-CPU counter state. The numbers in parentheses are
+the values of the "old" and "current" counters for the corresponding CPU.
+The "idx" value maps the "old" and "current" values to the underlying
+array, and is useful for debugging.
USAGE
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 8173cec473aa..aaf65f6c6cd7 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -33,23 +33,23 @@ rcu/rcuboost:
The output of "cat rcu/rcudata" looks as follows:
rcu_sched:
- 0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
- 1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
- 2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
- 3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
- 4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
- 5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
- 6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
- 7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
+ 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
+ 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
+ 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
+ 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
+ 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
+ 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
+ 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
+ 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
rcu_bh:
- 0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
- 1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
- 2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
- 3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
- 4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
- 5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
- 6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
- 7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
+ 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
+ 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
+ 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
+ 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
+ 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
+ 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
+ 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
+ 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
The first section lists the rcu_data structures for rcu_sched, the second
for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
@@ -84,7 +84,7 @@ o "pq" indicates that this CPU has passed through a quiescent state
CPU has not yet reported that fact, (2) some other CPU has not
yet reported for this grace period, or (3) both.
-o "pqc" indicates which grace period the last-observed quiescent
+o "pgp" indicates which grace period the last-observed quiescent
state for this CPU corresponds to. This is important for handling
the race between CPU 0 reporting an extended dynticks-idle
quiescent state for CPU 1 and CPU 1 suddenly waking up and
@@ -184,10 +184,14 @@ o "kt" is the per-CPU kernel-thread state. The digit preceding
The number after the final slash is the CPU that the kthread
is actually running on.
+ This field is displayed only for CONFIG_RCU_BOOST kernels.
+
o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
the number of times that this CPU's per-CPU kthread has gone
through its loop servicing invoke_rcu_cpu_kthread() requests.
+ This field is displayed only for CONFIG_RCU_BOOST kernels.
+
o "b" is the batch limit for this CPU. If more than this number
of RCU callbacks is ready to invoke, then the remainder will
be deferred.
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index ef820a3c378b..b6a56e37284c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -548,7 +548,7 @@ do { \
#endif
#ifdef CONFIG_PROVE_RCU
-extern void lockdep_rcu_dereference(const char *file, const int line);
+void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
#endif
#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f4f881a0ad8..2cf4226ade7e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -33,6 +33,7 @@
#ifndef __LINUX_RCUPDATE_H
#define __LINUX_RCUPDATE_H
+#include <linux/types.h>
#include <linux/cache.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
@@ -64,32 +65,74 @@ static inline void rcutorture_record_progress(unsigned long vernum)
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
+/* Exported common interfaces */
+
+#ifdef CONFIG_PREEMPT_RCU
+
/**
- * struct rcu_head - callback structure for use with RCU
- * @next: next update requests in a list
- * @func: actual update function to call after the grace period.
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed. However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked. RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
*/
-struct rcu_head {
- struct rcu_head *next;
- void (*func)(struct rcu_head *head);
-};
+extern void call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
-/* Exported common interfaces */
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+/* In classic RCU, call_rcu() is just call_rcu_sched(). */
+#define call_rcu call_rcu_sched
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
+/**
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by :
+ * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
+ * OR
+ * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ * These may be nested.
+ */
+extern void call_rcu_bh(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
+
+/**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_sched() assumes
+ * that the read-side critical sections end on enabling of preemption
+ * or on voluntary preemption.
+ * RCU read-side critical sections are delimited by :
+ * - rcu_read_lock_sched() and rcu_read_unlock_sched(),
+ * OR
+ * anything that disables preemption.
+ * These may be nested.
+ */
extern void call_rcu_sched(struct rcu_head *head,
void (*func)(struct rcu_head *rcu));
-extern void synchronize_sched(void);
-extern void rcu_barrier_bh(void);
-extern void rcu_barrier_sched(void);
-
-static inline void __rcu_read_lock_bh(void)
-{
- local_bh_disable();
-}
-static inline void __rcu_read_unlock_bh(void)
-{
- local_bh_enable();
-}
+extern void synchronize_sched(void);
#ifdef CONFIG_PREEMPT_RCU
@@ -152,6 +195,15 @@ static inline void rcu_exit_nohz(void)
#endif /* #else #ifdef CONFIG_NO_HZ */
+/*
+ * Infrastructure to implement the synchronize_() primitives in
+ * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
+ */
+
+typedef void call_rcu_func_t(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
+void wait_rcu_gp(call_rcu_func_t crf);
+
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
#include <linux/rcutree.h>
#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
@@ -297,19 +349,31 @@ extern int rcu_my_thread_group_empty(void);
/**
* rcu_lockdep_assert - emit lockdep splat if specified condition not met
* @c: condition to check
+ * @s: informative message
*/
-#define rcu_lockdep_assert(c) \
+#define rcu_lockdep_assert(c, s) \
do { \
static bool __warned; \
if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
__warned = true; \
- lockdep_rcu_dereference(__FILE__, __LINE__); \
+ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
} \
} while (0)
+#define rcu_sleep_check() \
+ do { \
+ rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \
+ "Illegal context switch in RCU-bh" \
+ " read-side critical section"); \
+ rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \
+ "Illegal context switch in RCU-sched"\
+ " read-side critical section"); \
+ } while (0)
+
#else /* #ifdef CONFIG_PROVE_RCU */
-#define rcu_lockdep_assert(c) do { } while (0)
+#define rcu_lockdep_assert(c, s) do { } while (0)
+#define rcu_sleep_check() do { } while (0)
#endif /* #else #ifdef CONFIG_PROVE_RCU */
@@ -338,14 +402,16 @@ extern int rcu_my_thread_group_empty(void);
#define __rcu_dereference_check(p, c, space) \
({ \
typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
- rcu_lockdep_assert(c); \
+ rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \
+ " usage"); \
rcu_dereference_sparse(p, space); \
smp_read_barrier_depends(); \
((typeof(*p) __force __kernel *)(_________p1)); \
})
#define __rcu_dereference_protected(p, c, space) \
({ \
- rcu_lockdep_assert(c); \
+ rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \
+ " usage"); \
rcu_dereference_sparse(p, space); \
((typeof(*p) __force __kernel *)(p)); \
})
@@ -359,15 +425,15 @@ extern int rcu_my_thread_group_empty(void);
#define __rcu_dereference_index_check(p, c) \
({ \
typeof(p) _________p1 = ACCESS_ONCE(p); \
- rcu_lockdep_assert(c); \
+ rcu_lockdep_assert(c, \
+ "suspicious rcu_dereference_index_check()" \
+ " usage"); \
smp_read_barrier_depends(); \
(_________p1); \
})
#define __rcu_assign_pointer(p, v, space) \
({ \
- if (!__builtin_constant_p(v) || \
- ((v) != NULL)) \
- smp_wmb(); \
+ smp_wmb(); \
(p) = (typeof(*v) __force space *)(v); \
})
@@ -500,26 +566,6 @@ extern int rcu_my_thread_group_empty(void);
#define rcu_dereference_protected(p, c) \
__rcu_dereference_protected((p), (c), __rcu)
-/**
- * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * This is the RCU-bh counterpart to rcu_dereference_protected().
- */
-#define rcu_dereference_bh_protected(p, c) \
- __rcu_dereference_protected((p), (c), __rcu)
-
-/**
- * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * This is the RCU-sched counterpart to rcu_dereference_protected().
- */
-#define rcu_dereference_sched_protected(p, c) \
- __rcu_dereference_protected((p), (c), __rcu)
-
/**
* rcu_dereference() - fetch RCU-protected pointer for dereferencing
@@ -630,7 +676,7 @@ static inline void rcu_read_unlock(void)
*/
static inline void rcu_read_lock_bh(void)
{
- __rcu_read_lock_bh();
+ local_bh_disable();
__acquire(RCU_BH);
rcu_read_acquire_bh();
}
@@ -644,7 +690,7 @@ static inline void rcu_read_unlock_bh(void)
{
rcu_read_release_bh();
__release(RCU_BH);
- __rcu_read_unlock_bh();
+ local_bh_enable();
}
/**
@@ -698,11 +744,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* any prior initialization. Returns the value assigned.
*
* Inserts memory barriers on architectures that require them
- * (pretty much all of them other than x86), and also prevents
- * the compiler from reordering the code that initializes the
- * structure after the pointer assignment. More importantly, this
- * call documents which pointers will be dereferenced by RCU read-side
- * code.
+ * (which is most of them), and also prevents the compiler from
+ * reordering the code that initializes the structure after the pointer
+ * assignment. More importantly, this call documents which pointers
+ * will be dereferenced by RCU read-side code.
+ *
+ * In some special cases, you may use RCU_INIT_POINTER() instead
+ * of rcu_assign_pointer(). RCU_INIT_PO