summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/memory-barriers.txt922
-rw-r--r--Documentation/robust-futex-ABI.txt4
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/alpha/include/asm/barrier.h25
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/atomic.h5
-rw-r--r--arch/arc/include/asm/barrier.h5
-rw-r--r--arch/arm/include/asm/barrier.h15
-rw-r--r--arch/arm64/include/asm/barrier.h50
-rw-r--r--arch/avr32/include/asm/barrier.h17
-rw-r--r--arch/blackfin/include/asm/barrier.h18
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/barrier.h25
-rw-r--r--arch/frv/include/asm/barrier.h8
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/atomic.h6
-rw-r--r--arch/hexagon/include/asm/barrier.h4
-rw-r--r--arch/ia64/include/asm/barrier.h23
-rw-r--r--arch/m32r/include/asm/barrier.h80
-rw-r--r--arch/m68k/include/asm/barrier.h14
-rw-r--r--arch/metag/include/asm/barrier.h15
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/barrier.h27
-rw-r--r--arch/mips/include/asm/barrier.h15
-rw-r--r--arch/mn10300/include/asm/Kbuild1
-rw-r--r--arch/mn10300/include/asm/barrier.h37
-rw-r--r--arch/parisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/barrier.h35
-rw-r--r--arch/powerpc/include/asm/barrier.h21
-rw-r--r--arch/powerpc/include/asm/spinlock.h2
-rw-r--r--arch/s390/include/asm/barrier.h15
-rw-r--r--arch/score/include/asm/Kbuild1
-rw-r--r--arch/score/include/asm/barrier.h16
-rw-r--r--arch/sh/include/asm/barrier.h21
-rw-r--r--arch/sparc/include/asm/barrier_32.h12
-rw-r--r--arch/sparc/include/asm/barrier_64.h15
-rw-r--r--arch/tile/include/asm/barrier.h68
-rw-r--r--arch/unicore32/include/asm/barrier.h11
-rw-r--r--arch/x86/include/asm/barrier.h43
-rw-r--r--arch/xtensa/include/asm/barrier.h9
-rw-r--r--include/asm-generic/barrier.h55
-rw-r--r--include/linux/compiler.h9
-rw-r--r--include/linux/spinlock.h10
-rw-r--r--kernel/futex.c201
-rw-r--r--kernel/locking/lockdep.c4
-rw-r--r--kernel/locking/mutex-debug.c7
-rw-r--r--kernel/rcu/tree.c18
-rw-r--r--kernel/rcu/tree_plugin.h13
-rw-r--r--kernel/softirq.c49
-rw-r--r--tools/lib/lockdep/Makefile251
-rw-r--r--tools/lib/lockdep/common.c33
-rw-r--r--tools/lib/lockdep/include/liblockdep/common.h50
-rw-r--r--tools/lib/lockdep/include/liblockdep/mutex.h70
-rw-r--r--tools/lib/lockdep/include/liblockdep/rwlock.h86
-rwxr-xr-xtools/lib/lockdep/lockdep3
-rw-r--r--tools/lib/lockdep/lockdep.c2
-rw-r--r--tools/lib/lockdep/lockdep_internals.h1
-rw-r--r--tools/lib/lockdep/lockdep_states.h1
-rw-r--r--tools/lib/lockdep/preload.c447
-rw-r--r--tools/lib/lockdep/rbtree.c1
-rw-r--r--tools/lib/lockdep/run_tests.sh27
-rw-r--r--tools/lib/lockdep/tests/AA.c13
-rw-r--r--tools/lib/lockdep/tests/ABBA.c13
-rw-r--r--tools/lib/lockdep/tests/ABBCCA.c15
-rw-r--r--tools/lib/lockdep/tests/ABBCCDDA.c17
-rw-r--r--tools/lib/lockdep/tests/ABCABC.c15
-rw-r--r--tools/lib/lockdep/tests/ABCDBCDA.c17
-rw-r--r--tools/lib/lockdep/tests/ABCDBDDA.c17
-rw-r--r--tools/lib/lockdep/tests/WW.c13
-rw-r--r--tools/lib/lockdep/tests/common.h12
-rw-r--r--tools/lib/lockdep/tests/unlock_balance.c12
-rw-r--r--tools/lib/lockdep/uinclude/asm/hweight.h3
-rw-r--r--tools/lib/lockdep/uinclude/asm/sections.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/bitops.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/compiler.h7
-rw-r--r--tools/lib/lockdep/uinclude/linux/debug_locks.h12
-rw-r--r--tools/lib/lockdep/uinclude/linux/delay.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/export.h7
-rw-r--r--tools/lib/lockdep/uinclude/linux/ftrace.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/gfp.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/hardirq.h11
-rw-r--r--tools/lib/lockdep/uinclude/linux/hash.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/interrupt.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/irqflags.h38
-rw-r--r--tools/lib/lockdep/uinclude/linux/kallsyms.h32
-rw-r--r--tools/lib/lockdep/uinclude/linux/kern_levels.h25
-rw-r--r--tools/lib/lockdep/uinclude/linux/kernel.h44
-rw-r--r--tools/lib/lockdep/uinclude/linux/kmemcheck.h8
-rw-r--r--tools/lib/lockdep/uinclude/linux/linkage.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/list.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/lockdep.h55
-rw-r--r--tools/lib/lockdep/uinclude/linux/module.h6
-rw-r--r--tools/lib/lockdep/uinclude/linux/mutex.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/poison.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/prefetch.h6
-rw-r--r--tools/lib/lockdep/uinclude/linux/proc_fs.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/rbtree.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/rbtree_augmented.h2
-rw-r--r--tools/lib/lockdep/uinclude/linux/rcu.h16
-rw-r--r--tools/lib/lockdep/uinclude/linux/seq_file.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/spinlock.h25
-rw-r--r--tools/lib/lockdep/uinclude/linux/stacktrace.h32
-rw-r--r--tools/lib/lockdep/uinclude/linux/stringify.h7
-rw-r--r--tools/lib/lockdep/uinclude/linux/types.h58
-rw-r--r--tools/lib/lockdep/uinclude/trace/events/lock.h3
105 files changed, 2825 insertions, 683 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index c8c42e64e953..102dc19c4119 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -194,18 +194,22 @@ There are some minimal guarantees that may be expected of a CPU:
(*) On any given CPU, dependent memory accesses will be issued in order, with
respect to itself. This means that for:
- Q = P; D = *Q;
+ ACCESS_ONCE(Q) = P; smp_read_barrier_depends(); D = ACCESS_ONCE(*Q);
the CPU will issue the following memory operations:
Q = LOAD P, D = LOAD *Q
- and always in that order.
+ and always in that order. On most systems, smp_read_barrier_depends()
+ does nothing, but it is required for DEC Alpha. The ACCESS_ONCE()
+ is required to prevent compiler mischief. Please note that you
+ should normally use something like rcu_dereference() instead of
+ open-coding smp_read_barrier_depends().
(*) Overlapping loads and stores within a particular CPU will appear to be
ordered within that CPU. This means that for:
- a = *X; *X = b;
+ a = ACCESS_ONCE(*X); ACCESS_ONCE(*X) = b;
the CPU will only issue the following sequence of memory operations:
@@ -213,7 +217,7 @@ There are some minimal guarantees that may be expected of a CPU:
And for:
- *X = c; d = *X;
+ ACCESS_ONCE(*X) = c; d = ACCESS_ONCE(*X);
the CPU will only issue:
@@ -224,6 +228,12 @@ There are some minimal guarantees that may be expected of a CPU:
And there are a number of things that _must_ or _must_not_ be assumed:
+ (*) It _must_not_ be assumed that the compiler will do what you want with
+ memory references that are not protected by ACCESS_ONCE(). Without
+ ACCESS_ONCE(), the compiler is within its rights to do all sorts
+ of "creative" transformations, which are covered in the Compiler
+ Barrier section.
+
(*) It _must_not_ be assumed that independent loads and stores will be issued
in the order given. This means that for:
@@ -371,33 +381,44 @@ Memory barriers come in four basic varieties:
And a couple of implicit varieties:
- (5) LOCK operations.
+ (5) ACQUIRE operations.
This acts as a one-way permeable barrier. It guarantees that all memory
- operations after the LOCK operation will appear to happen after the LOCK
- operation with respect to the other components of the system.
+ operations after the ACQUIRE operation will appear to happen after the
+ ACQUIRE operation with respect to the other components of the system.
+ ACQUIRE operations include LOCK operations and smp_load_acquire()
+ operations.
- Memory operations that occur before a LOCK operation may appear to happen
- after it completes.
+ Memory operations that occur before an ACQUIRE operation may appear to
+ happen after it completes.
- A LOCK operation should almost always be paired with an UNLOCK operation.
+ An ACQUIRE operation should almost always be paired with a RELEASE
+ operation.
- (6) UNLOCK operations.
+ (6) RELEASE operations.
This also acts as a one-way permeable barrier. It guarantees that all
- memory operations before the UNLOCK operation will appear to happen before
- the UNLOCK operation with respect to the other components of the system.
+ memory operations before the RELEASE operation will appear to happen
+ before the RELEASE operation with respect to the other components of the
+ system. RELEASE operations include UNLOCK operations and
+ smp_store_release() operations.
- Memory operations that occur after an UNLOCK operation may appear to
+ Memory operations that occur after a RELEASE operation may appear to
happen before it completes.
- LOCK and UNLOCK operations are guaranteed to appear with respect to each
- other strictly in the order specified.
+ The use of ACQUIRE and RELEASE operations generally precludes the need
+ for other sorts of memory barrier (but note the exceptions mentioned in
+ the subsection "MMIO write barrier"). In addition, a RELEASE+ACQUIRE
+ pair is -not- guaranteed to act as a full memory barrier. However, after
+ an ACQUIRE on a given variable, all memory accesses preceding any prior
+ RELEASE on that same variable are guaranteed to be visible. In other
+ words, within a given variable's critical section, all accesses of all
+ previous critical sections for that variable are guaranteed to have
+ completed.
- The use of LOCK and UNLOCK operations generally precludes the need for
- other sorts of memory barrier (but note the exceptions mentioned in the
- subsection "MMIO write barrier").
+ This means that ACQUIRE acts as a minimal "acquire" operation and
+ RELEASE acts as a minimal "release" operation.
Memory barriers are only required where there's a possibility of interaction
@@ -450,14 +471,14 @@ The usage requirements of data dependency barriers are a little subtle, and
it's not always obvious that they're needed. To illustrate, consider the
following sequence of events:
- CPU 1 CPU 2
- =============== ===============
+ CPU 1 CPU 2
+ =============== ===============
{ A == 1, B == 2, C = 3, P == &A, Q == &C }
B = 4;
<write barrier>
- P = &B
- Q = P;
- D = *Q;
+ ACCESS_ONCE(P) = &B
+ Q = ACCESS_ONCE(P);
+ D = *Q;
There's a clear data dependency here, and it would seem that by the end of the
sequence, Q must be either &A or &B, and that:
@@ -477,15 +498,15 @@ Alpha).
To deal with this, a data dependency barrier or better must be inserted
between the address load and the data load:
- CPU 1 CPU 2
- =============== ===============
+ CPU 1 CPU 2
+ =============== ===============
{ A == 1, B == 2, C = 3, P == &A, Q == &C }
B = 4;
<write barrier>
- P = &B
- Q = P;
- <data dependency barrier>
- D = *Q;
+ ACCESS_ONCE(P) = &B
+ Q = ACCESS_ONCE(P);
+ <data dependency barrier>
+ D = *Q;
This enforces the occurrence of one of the two implications, and prevents the
third possibility from arising.
@@ -500,25 +521,26 @@ odd-numbered bank is idle, one can see the new value of the pointer P (&B),
but the old value of the variable B (2).
-Another example of where data dependency barriers might by required is where a
+Another example of where data dependency barriers might be required is where a
number is read from memory and then used to calculate the index for an array
access:
- CPU 1 CPU 2
- =============== ===============
+ CPU 1 CPU 2
+ =============== ===============
{ M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
M[1] = 4;
<write barrier>
- P = 1
- Q = P;
- <data dependency barrier>
- D = M[Q];
+ ACCESS_ONCE(P) = 1
+ Q = ACCESS_ONCE(P);
+ <data dependency barrier>
+ D = M[Q];
-The data dependency barrier is very important to the RCU system, for example.
-See rcu_dereference() in include/linux/rcupdate.h. This permits the current
-target of an RCU'd pointer to be replaced with a new modified target, without
-the replacement target appearing to be incompletely initialised.
+The data dependency barrier is very important to the RCU system,
+for example. See rcu_assign_pointer() and rcu_dereference() in
+include/linux/rcupdate.h. This permits the current target of an RCU'd
+pointer to be replaced with a new modified target, without the replacement
+target appearing to be incompletely initialised.
See also the subsection on "Cache Coherency" for a more thorough example.
@@ -530,24 +552,190 @@ A control dependency requires a full read memory barrier, not simply a data
dependency barrier to make it work correctly. Consider the following bit of
code:
- q = &a;
- if (p) {
- <data dependency barrier>
- q = &b;
+ q = ACCESS_ONCE(a);
+ if (q) {
+ <data dependency barrier> /* BUG: No data dependency!!! */
+ p = ACCESS_ONCE(b);
}
- x = *q;
This will not have the desired effect because there is no actual data
-dependency, but rather a control dependency that the CPU may short-circuit by
-attempting to predict the outcome in advance. In such a case what's actually
-required is:
+dependency, but rather a control dependency that the CPU may short-circuit
+by attempting to predict the outcome in advance, so that other CPUs see
+the load from b as having happened before the load from a. In such a
+case what's actually required is:
- q = &a;
- if (p) {
+ q = ACCESS_ONCE(a);
+ if (q) {
<read barrier>
- q = &b;
+ p = ACCESS_ONCE(b);
+ }
+
+However, stores