diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-13 12:23:39 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-13 12:23:39 -0700 |
commit | de5d1b39ea0b38a9f4dfb08966042b7b91e2df30 (patch) | |
tree | 3591bdac4fe6756b4e3dc68b2ed1c792c4104218 | |
parent | 1c594774283a7cfe6dc0f8ffdfb2dbfc497502c4 (diff) | |
parent | fd2efaa4eb5317c3a86357a83a7d456a1b86a0ac (diff) |
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking/atomics update from Thomas Gleixner:
"The locking, atomics and memory model brains delivered:
- A larger update to the atomics code which reworks the ordering
barriers, consolidates the atomic primitives, provides the new
atomic64_fetch_add_unless() primitive and cleans up the include
hell.
- Simplify cmpxchg() instrumentation and add instrumentation for
xchg() and cmpxchg_double().
- Updates to the memory model and documentation"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (48 commits)
locking/atomics: Rework ordering barriers
locking/atomics: Instrument cmpxchg_double*()
locking/atomics: Instrument xchg()
locking/atomics: Simplify cmpxchg() instrumentation
locking/atomics/x86: Reduce arch_cmpxchg64*() instrumentation
tools/memory-model: Rename litmus tests to comply to norm7
tools/memory-model/Documentation: Fix typo, smb->smp
sched/Documentation: Update wake_up() & co. memory-barrier guarantees
locking/spinlock, sched/core: Clarify requirements for smp_mb__after_spinlock()
sched/core: Use smp_mb() in wake_woken_function()
tools/memory-model: Add informal LKMM documentation to MAINTAINERS
locking/atomics/Documentation: Describe atomic_set() as a write operation
tools/memory-model: Make scripts executable
tools/memory-model: Remove ACCESS_ONCE() from model
tools/memory-model: Remove ACCESS_ONCE() from recipes
locking/memory-barriers.txt/kokr: Update Korean translation to fix broken DMA vs. MMIO ordering example
MAINTAINERS: Add Daniel Lustig as an LKMM reviewer
tools/memory-model: Fix ISA2+pooncelock+pooncelock+pombonce name
tools/memory-model: Add litmus test for full multicopy atomicity
locking/refcount: Always allow checked forms
...
77 files changed, 988 insertions, 1982 deletions
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst index 2e7165f86f55..724583453e1f 100644 --- a/Documentation/core-api/atomic_ops.rst +++ b/Documentation/core-api/atomic_ops.rst @@ -29,7 +29,7 @@ updated by one CPU, local_t is probably more appropriate. Please see local_t. The first operations to implement for atomic_t's are the initializers and -plain reads. :: +plain writes. :: #define ATOMIC_INIT(i) { (i) } #define atomic_set(v, i) ((v)->counter = (i)) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index a02d6bbfc9d0..0d8d7ef131e9 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -2179,32 +2179,41 @@ or: event_indicated = 1; wake_up_process(event_daemon); -A write memory barrier is implied by wake_up() and co. if and only if they -wake something up. The barrier occurs before the task state is cleared, and so -sits between the STORE to indicate the event and the STORE to set TASK_RUNNING: +A general memory barrier is executed by wake_up() if it wakes something up. +If it doesn't wake anything up then a memory barrier may or may not be +executed; you must not rely on it. The barrier occurs before the task state +is accessed, in particular, it sits between the STORE to indicate the event +and the STORE to set TASK_RUNNING: - CPU 1 CPU 2 + CPU 1 (Sleeper) CPU 2 (Waker) =============================== =============================== set_current_state(); STORE event_indicated smp_store_mb(); wake_up(); - STORE current->state <write barrier> - <general barrier> STORE current->state - LOAD event_indicated + STORE current->state ... + <general barrier> <general barrier> + LOAD event_indicated if ((LOAD task->state) & TASK_NORMAL) + STORE task->state -To repeat, this write memory barrier is present if and only if something -is actually awakened. To see this, consider the following sequence of -events, where X and Y are both initially zero: +where "task" is the thread being woken up and it equals CPU 1's "current". + +To repeat, a general memory barrier is guaranteed to be executed by wake_up() +if something is actually awakened, but otherwise there is no such guarantee. +To see this, consider the following sequence of events, where X and Y are both +initially zero: CPU 1 CPU 2 =============================== =============================== - X = 1; STORE event_indicated + X = 1; Y = 1; smp_mb(); wake_up(); - Y = 1; wait_event(wq, Y == 1); - wake_up(); load from Y sees 1, no memory barrier - load from X might see 0 + LOAD Y LOAD X + +If a wakeup does occur, one (at least) of the two loads must see 1. If, on +the other hand, a wakeup does not occur, both loads might see 0. -In contrast, if a wakeup does occur, CPU 2's load from X would be guaranteed -to see 1. +wake_up_process() always executes a general memory barrier. The barrier again +occurs before the task state is accessed. In particular, if the wake_up() in +the previous snippet were replaced by a call to wake_up_process() then one of +the two loads would be guaranteed to see 1. The available waker functions include: @@ -2224,6 +2233,8 @@ The available waker functions include: wake_up_poll(); wake_up_process(); +In terms of memory ordering, these functions all provide the same guarantees of +a wake_up() (or stronger). [!] Note that the memory barriers implied by the sleeper and the waker do _not_ order multiple stores before the wake-up with respect to loads of those stored diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 921739d00f69..7f01fb1c1084 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -1891,22 +1891,22 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효 /* 소유권을 수정 */ desc->status = DEVICE_OWN; - /* MMIO 를 통해 디바이스에 공지를 하기 전에 메모리를 동기화 */ - wmb(); - /* 업데이트된 디스크립터의 디바이스에 공지 */ writel(DESC_NOTIFY, doorbell); } dma_rmb() 는 디스크립터로부터 데이터를 읽어오기 전에 디바이스가 소유권을 - 내놓았음을 보장하게 하고, dma_wmb() 는 디바이스가 자신이 소유권을 다시 - 가졌음을 보기 전에 디스크립터에 데이터가 쓰였음을 보장합니다. wmb() 는 - 캐시 일관성이 없는 (cache incoherent) MMIO 영역에 쓰기를 시도하기 전에 - 캐시 일관성이 있는 메모리 (cache coherent memory) 쓰기가 완료되었음을 - 보장해주기 위해 필요합니다. - - consistent memory 에 대한 자세한 내용을 위해선 Documentation/DMA-API.txt - 문서를 참고하세요. + 내려놓았을 것을 보장하고, dma_wmb() 는 디바이스가 자신이 소유권을 다시 + 가졌음을 보기 전에 디스크립터에 데이터가 쓰였을 것을 보장합니다. 참고로, + writel() 을 사용하면 캐시 일관성이 있는 메모리 (cache coherent memory) + 쓰기가 MMIO 영역에의 쓰기 전에 완료되었을 것을 보장하므로 writel() 앞에 + wmb() 를 실행할 필요가 없음을 알아두시기 바랍니다. writel() 보다 비용이 + 저렴한 writel_relaxed() 는 이런 보장을 제공하지 않으므로 여기선 사용되지 + 않아야 합니다. + + writel_relaxed() 와 같은 완화된 I/O 접근자들에 대한 자세한 내용을 위해서는 + "커널 I/O 배리어의 효과" 섹션을, consistent memory 에 대한 자세한 내용을 + 위해선 Documentation/DMA-API.txt 문서를 참고하세요. MMIO 쓰기 배리어 diff --git a/MAINTAINERS b/MAINTAINERS index d0d729016d65..629e08703c82 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8317,10 +8317,16 @@ M: Jade Alglave <j.alglave@ucl.ac.uk> M: Luc Maranget <luc.maranget@inria.fr> M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> R: Akira Yokosawa <akiyks@gmail.com> +R: Daniel Lustig <dlustig@nvidia.com> L: linux-kernel@vger.kernel.org +L: linux-arch@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git F: tools/memory-model/ +F: Documentation/atomic_bitops.txt +F: Documentation/atomic_t.txt +F: Documentation/core-api/atomic_ops.rst +F: Documentation/core-api/refcount-vs-atomic.rst F: Documentation/memory-barriers.txt LINUX SECURITY MODULE (LSM) FRAMEWORK diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 767bfdd42992..150a1c5d6a2c 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -18,11 +18,11 @@ * To ensure dependency ordering is preserved for the _relaxed and * _release atomics, an smp_read_barrier_depends() is unconditionally * inserted into the _relaxed variants, which are used to build the - * barriered versions. To avoid redundant back-to-back fences, we can - * define the _acquire and _fence versions explicitly. + * barriered versions. Avoid redundant back-to-back fences in the + * _acquire and _fence versions. */ -#define __atomic_op_acquire(op, args...) op##_relaxed(args) -#define __atomic_op_fence __atomic_op_release +#define __atomic_acquire_fence() +#define __atomic_post_full_fence() #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } @@ -206,7 +206,7 @@ ATOMIC_OPS(xor, xor) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** - * __atomic_add_unless - add unless the number is a given value + * atomic_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -214,7 +214,7 @@ ATOMIC_OPS(xor, xor) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) +static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u) { int c, new, old; smp_mb(); @@ -235,38 +235,39 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) smp_mb(); return old; } - +#define atomic_fetch_add_unless atomic_fetch_add_unless /** - * atomic64_add_unless - add unless the number is a given value + * atomic64_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as it was not @u. - * Returns true iff @v was not @u. + * Returns the old value of @v. */ -static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) +static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) { - long c, tmp; + long c, new, old; smp_mb(); __asm__ __volatile__( - "1: ldq_l %[tmp],%[mem]\n" - " cmpeq %[tmp],%[u],%[c]\n" - " addq %[tmp],%[a],%[tmp]\n" + "1: ldq_l %[old],%[mem]\n" + " cmpeq %[old],%[u],%[c]\n" + " addq %[old],%[a],%[new]\n" " bne %[c],2f\n" - " stq_c %[tmp],%[mem]\n" - " beq %[tmp],3f\n" + " stq_c %[new],%[mem]\n" + " beq %[new],3f\n" "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" - : [tmp] "=&r"(tmp), [c] "=&r"(c) + : [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c) : [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u) : "memory"); smp_mb(); - return !c; + return old; } +#define atomic64_fetch_add_unless atomic64_fetch_add_unless /* * atomic64_dec_if_positive - decrement by 1 if old value positive @@ -295,31 +296,6 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) smp_mb(); return old - 1; } - -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) - -#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) -#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) - -#define atomic_dec_return(v) atomic_sub_return(1,(v)) -#define atomic64_dec_return(v) atomic64_sub_return(1,(v)) - -#define atomic_inc_return(v) atomic_add_return(1,(v)) -#define atomic64_inc_return(v) atomic64_add_return(1,(v)) - -#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0) -#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0) - -#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) -#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0) - -#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) -#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0) - -#define atomic_inc(v) atomic_add(1,(v)) -#define atomic64_inc(v) atomic64_add(1,(v)) - -#define atomic_dec(v) atomic_sub(1,(v)) -#define atomic64_dec(v) atomic64_sub(1,(v)) +#define atomic64_dec_if_positive atomic64_dec_if_positive #endif /* _ALPHA_ATOMIC_H */ diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 11859287c52a..4e0072730241 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -187,7 +187,8 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -#define atomic_andnot atomic_andnot +#define atomic_andnot atomic_andnot +#define atomic_fetch_andnot atomic_fetch_andnot #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ @@ -296,8 +297,6 @@ ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3) ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) -#define atomic_andnot(mask, v) atomic_and(~(mask), (v)) -#define atomic_fetch_andnot(mask, v) atomic_fetch_and(~(mask), (v)) ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) @@ -308,48 +307,6 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -/** - * __atomic_add_unless - add unless the number is a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v - */ -#define __atomic_add_unless(v, a, u) \ -({ \ - int c, old; \ - \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND thmeselves don't provide any such semantics \ - */ \ - smp_mb(); \ - \ - c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ - c = old; \ - \ - smp_mb(); \ - \ - c; \ -}) - -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) - -#define atomic_inc(v) atomic_add(1, v) -#define atomic_dec(v) atomic_sub(1, v) - -#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) -#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -#define atomic_inc_return(v) atomic_add_return(1, (v)) -#define atomic_dec_return(v) atomic_sub_return(1, (v)) -#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) - -#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0) - - #ifdef CONFIG_GENERIC_ATOMIC64 #include <asm-generic/atomic64.h> @@ -472,7 +429,8 @@ static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \ ATOMIC64_OP_RETURN(op, op1, op2) \ ATOMIC64_FETCH_OP(op, op1, op2) -#define atomic64_a |