diff options
61 files changed, 845 insertions, 466 deletions
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt index 613033ff2b9b..5e6429d66c24 100644 --- a/Documentation/RCU/rcuref.txt +++ b/Documentation/RCU/rcuref.txt @@ -12,6 +12,7 @@ please read on. Reference counting on elements of lists which are protected by traditional reader/writer spinlocks or semaphores are straightforward: +CODE LISTING A: 1. 2. add() search_and_reference() { { @@ -28,7 +29,8 @@ add() search_and_reference() release_referenced() delete() { { ... write_lock(&list_lock); - atomic_dec(&el->rc, relfunc) ... + if(atomic_dec_and_test(&el->rc)) ... + kfree(el); ... remove_element } write_unlock(&list_lock); ... @@ -44,6 +46,7 @@ search_and_reference() could potentially hold reference to an element which has already been deleted from the list/array. Use atomic_inc_not_zero() in this scenario as follows: +CODE LISTING B: 1. 2. add() search_and_reference() { { @@ -79,6 +82,7 @@ search_and_reference() code path. In such cases, the atomic_dec_and_test() may be moved from delete() to el_free() as follows: +CODE LISTING C: 1. 2. add() search_and_reference() { { @@ -114,6 +118,17 @@ element can therefore safely be freed. This in turn guarantees that if any reader finds the element, that reader may safely acquire a reference without checking the value of the reference counter. +A clear advantage of the RCU-based pattern in listing C over the one +in listing B is that any call to search_and_reference() that locates +a given object will succeed in obtaining a reference to that object, +even given a concurrent invocation of delete() for that same object. +Similarly, a clear advantage of both listings B and C over listing A is +that a call to delete() is not delayed even if there are an arbitrarily +large number of calls to search_and_reference() searching for the same +object that delete() was invoked on. Instead, all that is delayed is +the eventual invocation of kfree(), which is usually not a problem on +modern computer systems, even the small ones. + In cases where delete() can sleep, synchronize_rcu() can be called from delete(), so that el_free() can be subsumed into delete as follows: @@ -130,3 +145,7 @@ delete() kfree(el); ... } + +As additional examples in the kernel, the pattern in listing C is used by +reference counting of struct pid, while the pattern in listing B is used by +struct posix_acl. diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1ab70c37921f..13e88fc00f01 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -153,7 +153,7 @@ rcupdate.rcu_task_stall_timeout This boot/sysfs parameter controls the RCU-tasks stall warning interval. A value of zero or less suppresses RCU-tasks stall warnings. A positive value sets the stall-warning interval - in jiffies. An RCU-tasks stall warning starts with the line: + in seconds. An RCU-tasks stall warning starts with the line: INFO: rcu_tasks detected stalls on tasks: diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 981651a8b65d..7e1a8721637a 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -212,7 +212,7 @@ synchronize_rcu() rcu_assign_pointer() - typeof(p) rcu_assign_pointer(p, typeof(p) v); + void rcu_assign_pointer(p, typeof(p) v); Yes, rcu_assign_pointer() -is- implemented as a macro, though it would be cool to be able to declare a function in this manner. @@ -220,9 +220,9 @@ rcu_assign_pointer() The updater uses this function to assign a new value to an RCU-protected pointer, in order to safely communicate the change - in value from the updater to the reader. This function returns - the new value, and also executes any memory-barrier instructions - required for a given CPU architecture. + in value from the updater to the reader. This macro does not + evaluate to an rvalue, but it does execute any memory-barrier + instructions required for a given CPU architecture. Perhaps just as important, it serves to document (1) which pointers are protected by RCU and (2) the point at which a diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 138f6664b2e2..b96fd15c7316 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3752,6 +3752,12 @@ the propagation of recent CPU-hotplug changes up the rcu_node combining tree. + rcutree.use_softirq= [KNL] + If set to zero, move all RCU_SOFTIRQ processing to + per-CPU rcuc kthreads. Defaults to a non-zero + value, meaning that RCU_SOFTIRQ is used by default. + Specify rcutree.use_softirq=0 to use rcuc kthreads. + rcutree.rcu_fanout_exact= [KNL] Disable autobalancing of the rcu_node combining tree. This is used by rcutorture, and might diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt index dca3fb0554db..b3afe69d03a1 100644 --- a/Documentation/atomic_t.txt +++ b/Documentation/atomic_t.txt @@ -187,8 +187,14 @@ The barriers: smp_mb__{before,after}_atomic() -only apply to the RMW ops and can be used to augment/upgrade the ordering -inherent to the used atomic op. These barriers provide a full smp_mb(). +only apply to the RMW atomic ops and can be used to augment/upgrade the +ordering inherent to the op. These barriers act almost like a full smp_mb(): +smp_mb__before_atomic() orders all earlier accesses against the RMW op +itself and all accesses following it, and smp_mb__after_atomic() orders all +later accesses against the RMW op and all accesses preceding it. However, +accesses between the smp_mb__{before,after}_atomic() and the RMW op are not +ordered, so it is advisable to place the barrier right next to the RMW atomic +op whenever possible. These helper barriers exist because architectures have varying implicit ordering on their SMP atomic primitives. For example our TSO architectures @@ -212,7 +218,9 @@ Further, while something like: atomic_dec(&X); is a 'typical' RELEASE pattern, the barrier is strictly stronger than -a RELEASE. Similarly for something like: +a RELEASE because it orders preceding instructions against both the read +and write parts of the atomic_dec(), and against all following instructions +as well. Similarly, something like: atomic_inc(&X); smp_mb__after_atomic(); @@ -244,7 +252,8 @@ strictly stronger than ACQUIRE. As illustrated: This should not happen; but a hypothetical atomic_inc_acquire() -- (void)atomic_fetch_inc_acquire() for instance -- would allow the outcome, -since then: +because it would not order the W part of the RMW against the following +WRITE_ONCE. Thus: P1 P2 diff --git a/Documentation/core-api/circular-buffers.rst b/Documentation/core-api/circular-buffers.rst index 53e51caa3347..50966f66e398 100644 --- a/Documentation/core-api/circular-buffers.rst +++ b/Documentation/core-api/circular-buffers.rst @@ -3,7 +3,7 @@ Circular Buffers ================ :Author: David Howells <dhowells@redhat.com> -:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> +:Author: Paul E. McKenney <paulmck@linux.ibm.com> Linux provides a number of features that can be used to implement circular diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f70ebcdfe592..e4e07c8ab89e 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -3,7 +3,7 @@ ============================ By: David Howells <dhowells@redhat.com> - Paul E. McKenney <paulmck@linux.vnet.ibm.com> + Paul E. McKenney <paulmck@linux.ibm.com> Will Deacon <will.deacon@arm.com> Peter Zijlstra <peterz@infradead.org> diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index db0b9d8619f1..5f3c74dcad43 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -24,7 +24,7 @@ Documentation/memory-barriers.txt ========================= 저자: David Howells <dhowells@redhat.com> - Paul E. McKenney <paulmck@linux.vnet.ibm.com> + Paul E. McKenney <paulmck@linux.ibm.com> Will Deacon <will.deacon@arm.com> Peter Zijlstra <peterz@infradead.org> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6e2377e6c1d6..e8eef38b2213 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -632,11 +632,18 @@ do { \ "IRQs not disabled as expected\n"); \ } while (0) +#define lockdep_assert_in_irq() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + !current->hardirq_context, \ + "Not in hardirq as expected\n"); \ + } while (0) + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) +# define lockdep_assert_in_irq() do { } while (0) #endif #ifdef CONFIG_LOCKDEP diff --git a/include/linux/module.h b/include/linux/module.h index 188998d3dca9..1455812dd325 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -21,6 +21,7 @@ #include <linux/rbtree_latch.h> #include <linux/error-injection.h> #include <linux/tracepoint-defs.h> +#include <linux/srcu.h> #include <linux/percpu.h> #include <asm/module.h> @@ -450,6 +451,10 @@ struct module { unsigned int num_tracepoints; tracepoint_ptr_t *tracepoints_ptrs; #endif +#ifdef CONFIG_TREE_SRCU + unsigned int num_srcu_structs; + struct srcu_struct **srcu_struct_ptrs; +#endif #ifdef CONFIG_BPF_EVENTS unsigned int num_bpf_raw_events; struct bpf_raw_event_map *bpf_raw_events; diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 03cb4b6f842e..2809b44cbbee 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -17,14 +17,18 @@ struct percpu_rw_semaphore { int readers_block; }; -#define DEFINE_STATIC_PERCPU_RWSEM(name) \ +#define __DEFINE_PERCPU_RWSEM(name, is_static) \ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ -static struct percpu_rw_semaphore name = { \ - .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ +is_static struct percpu_rw_semaphore name = { \ + .rss = __RCU_SYNC_INITIALIZER(name.rss), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ .writer = __RCUWAIT_INITIALIZER(name.writer), \ } +#define DEFINE_PERCPU_RWSEM(name) \ + __DEFINE_PERCPU_RWSEM(name, /* not static */) +#define DEFINE_STATIC_PERCPU_RWSEM(name) \ + __DEFINE_PERCPU_RWSEM(name, static) extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 6fc53a1345b3..9b83865d24f9 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -13,62 +13,44 @@ #include <linux/wait.h> #include <linux/rcupdate.h> -enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; - /* Structure to mediate between updaters and fastpath-using readers. */ struct rcu_sync { int gp_state; int gp_count; wait_queue_head_t gp_wait; - int cb_state; struct rcu_head cb_head; - - enum rcu_sync_type gp_type; }; -extern void rcu_sync_lockdep_assert(struct rcu_sync *); - /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization * - * Returns true if readers are permitted to use their fastpaths. - * Must be invoked within an RCU read-side critical section whose - * flavor matches that of the rcu_sync struture. + * Returns true if readers are permitted to use their fastpaths. Must be + * invoked within some flavor of RCU read-side critical section. */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { -#ifdef CONFIG_PROVE_RCU - rcu_sync_lockdep_assert(rsp); -#endif - return !rsp->gp_state; /* GP_IDLE */ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && + !rcu_read_lock_bh_held() && + !rcu_read_lock_sched_held(), + "suspicious rcu_sync_is_idle() usage"); + return !READ_ONCE(rsp->gp_state); /* GP_IDLE */ } -extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); +extern void rcu_sync_init(struct rcu_sync *); extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); -#define __RCU_SYNC_INITIALIZER(name, type) { \ +#define __RCU_SYNC_INITIALIZER(name) { \ .gp_state = 0, \ .gp_count = 0, \ .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ - .cb_state = 0, \ - .gp_type = type, \ } -#define __DEFINE_RCU_SYNC(name, type) \ - struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type) - -#define DEFINE_RCU_SYNC(name) \ - __DEFINE_RCU_SYNC(name, RCU_SYNC) - -#define DEFINE_RCU_SCHED_SYNC(name) \ - __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC) - -#define DEFINE_RCU_BH_SYNC(name) \ - __DEFINE_RCU_SYNC(name, RCU_BH_SYNC) +#define DEFINE_RCU_SYNC(name) \ + struct rcu_sync name = __RCU_SYNC_INITIALIZER(name) #endif /* _LINUX_RCU_SYNC_H_ */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b25d20822e75..8f7167478c1d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -365,16 +365,15 @@ static inline void rcu_preempt_sleep_check(void) { } * other macros that it invokes. */ #define rcu_assign_pointer(p, v) \ -({ \ +do { \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ - rcu_check_sparse(p, __rcu); \ + rcu_check_sparse(p, __rcu); \ \ if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ else \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ - _r_a_p__v; \ -}) +} while (0) /** * rcu_swap_protected() - swap an RCU and a regular pointer @@ -586,7 +585,7 @@ static inline void rcu_preempt_sleep_check(void) { } * read-side critical sections may be preempted and they may also block, but * only when acquiring spinlocks that are subject to priority inheritance. */ -static inline void rcu_read_lock(void) +static __always_inline void rcu_read_lock(void) { __rcu_read_lock(); __acquire(RCU); @@ -803,7 +802,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree - * @rcu_head: the name of the struct rcu_head within the type of @ptr. + * @rhf: the name of the struct rcu_head within the type of @ptr. * * Many rcu callbacks functions just call kfree() on the base structure. * These functions are trivial, but their size adds up, and furthermore @@ -826,9 +825,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ -#define kfree_rcu(ptr, rcu_head) \ - __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) - +#define kfree_rcu(ptr, rhf) \ +do { \ + typeof (ptr) ___p = (ptr); \ + \ + if (___p) \ + __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \ +} while (0) /* * Place this after a lock-acquisition primitive to guarantee that diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..942a44c1b8eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -565,7 +565,7 @@ union rcu_special { u8 blocked; u8 need_qs; u8 exp_hint; /* Hint for performance. */ - u8 pad; /* No garbage from compiler! */ + u8 deferred_qs; } b; /* Bits. */ u32 s; /* Set of bits. */ }; diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 7f7c8c050f63..9cfcc8a756ae 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -120,9 +120,17 @@ struct srcu_struct { * * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ -#define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data) +#ifdef MODULE +# define __DEFINE_SRCU(name, is_static) \ + is_static struct srcu_struct name; \ + struct srcu_struct * const __srcu_struct_##name \ + __section("___srcu_struct_ptrs") = &name +#else +# define __DEFINE_SRCU(name, is_static) \ + static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ + is_static struct srcu_struct name = \ + __SRCU_STRUCT_INIT(name, name##_srcu_data) +#endif #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) diff --git a/include/linux/torture.h b/include/linux/torture.h index 23d80db426d7..a620118385bb 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -66,7 +66,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void)); /* Task stuttering, which forces load/no-load transitions. */ bool stutter_wait(const char *title); -int torture_stutter_init(int s); +int torture_stutter_init(int s, int sgap); /* Initialization and cleanup. */ bool torture_init_begin(char *ttype, int v); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index bf9dbffd46b1..cdbeff87fa99 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -101,7 +101,7 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); */ static DEFINE_SPINLOCK(cgroup_file_kn_lock); -struct percpu_rw_semaphore cgroup_threadgroup_rwsem; +DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem); #define cgroup_assert_mutex_or_rcu_locked() \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ @@ -5666,7 +5666,6 @@ int __init cgroup_init(void) int ssid; BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); - BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 78f61bfc6b79..97c367f0a9aa 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -46,7 +46,7 @@ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) -static struct percpu_rw_semaphore dup_mmap_sem; +DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 @@ -2302,7 +2302,5 @@ void __init uprobes_init(void) for (i = 0; i < UPROBES_HASH_SZ; i++) mutex_init(&uprobes_mmap_mutex[i]); - BUG_ON(percpu_init_rwsem(&dup_mmap_sem)); - BUG_ON(register_die_notifier(&uprobe_exception_nb)); } diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 80a463d31a8d..c513031cd7e3 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -975,7 +975,7 @@ static int __init lock_torture_init(void) goto unwind; } if (stutter > 0) { - firsterr = torture_stutter_init(stutter); + firsterr = torture_stutter_init(stutter, stutter); if (firsterr) goto unwind; } diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index b6a9cc62099a..364d38a0c444 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, return -ENOMEM; /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ - rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); + rcu_sync_init(&sem->rss); __init_rwsem(&sem->rw_sem, name, rwsem_key); rcuwait_init(&sem->writer); sem->readers_block = 0; diff --git a/kernel/module.c b/k |