diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/compat.c | 63 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/hung_task.c | 4 | ||||
-rw-r--r-- | kernel/irq/chip.c | 1 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 1 | ||||
-rw-r--r-- | kernel/rcupdate.c | 28 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 16 | ||||
-rw-r--r-- | kernel/rcutorture.c | 257 | ||||
-rw-r--r-- | kernel/rcutree.c | 332 | ||||
-rw-r--r-- | kernel/rcutree.h | 23 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 154 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 4 | ||||
-rw-r--r-- | kernel/sched/core.c | 3 | ||||
-rw-r--r-- | kernel/srcu.c | 548 | ||||
-rw-r--r-- | kernel/timer.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 1 |
17 files changed, 1134 insertions, 317 deletions
diff --git a/kernel/compat.c b/kernel/compat.c index 74ff8498809a..d2c67aa49ae6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -372,25 +372,54 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set) #ifdef __ARCH_WANT_SYS_SIGPROCMASK -asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, - compat_old_sigset_t __user *oset) +/* + * sys_sigprocmask SIG_SETMASK sets the first (compat) word of the + * blocked set of signals to the supplied signal set + */ +static inline void compat_sig_setmask(sigset_t *blocked, compat_sigset_word set) { - old_sigset_t s; - long ret; - mm_segment_t old_fs; + memcpy(blocked->sig, &set, sizeof(set)); +} - if (set && get_user(s, set)) - return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_sigprocmask(how, - set ? (old_sigset_t __user *) &s : NULL, - oset ? (old_sigset_t __user *) &s : NULL); - set_fs(old_fs); - if (ret == 0) - if (oset) - ret = put_user(s, oset); - return ret; +asmlinkage long compat_sys_sigprocmask(int how, + compat_old_sigset_t __user *nset, + compat_old_sigset_t __user *oset) +{ + old_sigset_t old_set, new_set; + sigset_t new_blocked; + + old_set = current->blocked.sig[0]; + + if (nset) { + if (get_user(new_set, nset)) + return -EFAULT; + new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); + + new_blocked = current->blocked; + + switch (how) { + case SIG_BLOCK: + sigaddsetmask(&new_blocked, new_set); + break; + case SIG_UNBLOCK: + sigdelsetmask(&new_blocked, new_set); + break; + case SIG_SETMASK: + compat_sig_setmask(&new_blocked, new_set); + break; + default: + return -EINVAL; + } + + set_current_blocked(&new_blocked); + } + + if (oset) { + if (put_user(old_set, oset)) + return -EFAULT; + } + + return 0; } #endif diff --git a/kernel/fork.c b/kernel/fork.c index a1793e442b20..9f9b296fa6df 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -47,6 +47,7 @@ #include <linux/audit.h> #include <linux/memcontrol.h> #include <linux/ftrace.h> +#include <linux/proc_fs.h> #include <linux/profile.h> #include <linux/rmap.h> #include <linux/ksm.h> @@ -1493,6 +1494,8 @@ bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: + if (unlikely(clone_flags & CLONE_NEWPID)) + pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index c21449f85a2a..6df614912b9d 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -108,8 +108,10 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) touch_nmi_watchdog(); - if (sysctl_hung_task_panic) + if (sysctl_hung_task_panic) { + trigger_all_cpu_backtrace(); panic("hung_task: blocked tasks"); + } } /* diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6080f6bc8c33..3914c1e03cff 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -518,6 +518,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) out_unlock: raw_spin_unlock(&desc->lock); } +EXPORT_SYMBOL(handle_edge_irq); #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER /** diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index d86e254b95eb..192a302d6cfd 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -112,6 +112,7 @@ struct irq_desc *irq_to_desc(unsigned int irq) { return radix_tree_lookup(&irq_desc_tree, irq); } +EXPORT_SYMBOL(irq_to_desc); static void delete_irq_desc(unsigned int irq) { diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a86f1741cc27..95cba41ce1e9 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -51,6 +51,34 @@ #include "rcu.h" +#ifdef CONFIG_PREEMPT_RCU + +/* + * Check for a task exiting while in a preemptible-RCU read-side + * critical section, clean up if so. No need to issue warnings, + * as debug_check_no_locks_held() already does this if lockdep + * is enabled. + */ +void exit_rcu(void) +{ + struct task_struct *t = current; + + if (likely(list_empty(¤t->rcu_node_entry))) + return; + t->rcu_read_lock_nesting = 1; + barrier(); + t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; + __rcu_read_unlock(); +} + +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +void exit_rcu(void) +{ +} + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; struct lockdep_map rcu_lock_map = diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 22ecea0dfb62..fc31a2d65100 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -851,22 +851,6 @@ int rcu_preempt_needs_cpu(void) return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; } -/* - * Check for a task exiting while in a preemptible -RCU read-side - * critical section, clean up if so. No need to issue warnings, - * as debug_check_no_locks_held() already does this if lockdep - * is enabled. - */ -void exit_rcu(void) -{ - struct task_struct *t = current; - - if (t->rcu_read_lock_nesting == 0) - return; - t->rcu_read_lock_nesting = 1; - __rcu_read_unlock(); -} - #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ #ifdef CONFIG_RCU_TRACE diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index a89b381a8c6e..e66b34ab7555 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -64,6 +64,7 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ static int fqs_holdoff; /* Hold time within burst (us). */ static int fqs_stutter = 3; /* Wait time between bursts (s). */ +static int n_barrier_cbs; /* Number of callbacks to test RCU barriers. */ static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */ static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ @@ -96,6 +97,8 @@ module_param(fqs_holdoff, int, 0444); MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); module_param(fqs_stutter, int, 0444); MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); +module_param(n_barrier_cbs, int, 0444); +MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing"); module_param(onoff_interval, int, 0444); MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); module_param(onoff_holdoff, int, 0444); @@ -139,6 +142,8 @@ static struct task_struct *shutdown_task; static struct task_struct *onoff_task; #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static struct task_struct *stall_task; +static struct task_struct **barrier_cbs_tasks; +static struct task_struct *barrier_task; #define RCU_TORTURE_PIPE_LEN 10 @@ -164,6 +169,7 @@ static atomic_t n_rcu_torture_alloc_fail; static atomic_t n_rcu_torture_free; static atomic_t n_rcu_torture_mberror; static atomic_t n_rcu_torture_error; +static long n_rcu_torture_barrier_error; static long n_rcu_torture_boost_ktrerror; static long n_rcu_torture_boost_rterror; static long n_rcu_torture_boost_failure; @@ -173,6 +179,8 @@ static long n_offline_attempts; static long n_offline_successes; static long n_online_attempts; static long n_online_successes; +static long n_barrier_attempts; +static long n_barrier_successes; static struct list_head rcu_torture_removed; static cpumask_var_t shuffle_tmp_mask; @@ -197,6 +205,10 @@ static unsigned long shutdown_time; /* jiffies to system shutdown. */ static unsigned long boost_starttime; /* jiffies of next boost test start. */ DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ +static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ +static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ +static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ +static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ @@ -327,6 +339,7 @@ struct rcu_torture_ops { int (*completed)(void); void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); + void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); void (*cb_barrier)(void); void (*fqs)(void); int (*stats)(char *page); @@ -417,6 +430,7 @@ static struct rcu_torture_ops rcu_ops = { .completed = rcu_torture_completed, .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, + .call = call_rcu, .cb_barrier = rcu_barrier, .fqs = rcu_force_quiescent_state, .stats = NULL, @@ -460,6 +474,7 @@ static struct rcu_torture_ops rcu_sync_ops = { .completed = rcu_torture_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu, + .call = NULL, .cb_barrier = NULL, .fqs = rcu_force_quiescent_state, .stats = NULL, @@ -477,6 +492,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { .completed = rcu_no_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu_expedited, + .call = NULL, .cb_barrier = NULL, .fqs = rcu_force_quiescent_state, .stats = NULL, @@ -519,6 +535,7 @@ static struct rcu_torture_ops rcu_bh_ops = { .completed = rcu_bh_torture_completed, .deferred_free = rcu_bh_torture_deferred_free, .sync = synchronize_rcu_bh, + .call = call_rcu_bh, .cb_barrier = rcu_barrier_bh, .fqs = rcu_bh_force_quiescent_state, .stats = NULL, @@ -535,6 +552,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { .completed = rcu_bh_torture_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu_bh, + .call = NULL, .cb_barrier = NULL, .fqs = rcu_bh_force_quiescent_state, .stats = NULL, @@ -551,6 +569,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = { .completed = rcu_bh_torture_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu_bh_expedited, + .call = NULL, .cb_barrier = NULL, .fqs = rcu_bh_force_quiescent_state, .stats = NULL, @@ -606,6 +625,11 @@ static int srcu_torture_completed(void) return srcu_batches_completed(&srcu_ctl); } +static void srcu_torture_deferred_free(struct rcu_torture *rp) +{ + call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); +} + static void srcu_torture_synchronize(void) { synchronize_srcu(&srcu_ctl); @@ -620,7 +644,7 @@ static int srcu_torture_stats(char *page) cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { - cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu, + cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu, per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); } @@ -635,13 +659,29 @@ static struct rcu_torture_ops srcu_ops = { .read_delay = srcu_read_delay, .readunlock = srcu_torture_read_unlock, .completed = srcu_torture_completed, - .deferred_free = rcu_sync_torture_deferred_free, + .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, + .call = NULL, .cb_barrier = NULL, .stats = srcu_torture_stats, .name = "srcu" }; +static struct rcu_torture_ops srcu_sync_ops = { + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .completed = srcu_torture_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = srcu_torture_synchronize, + .call = NULL, + .cb_barrier = NULL, + .stats = srcu_torture_stats, + .name = "srcu_sync" +}; + static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) { return srcu_read_lock_raw(&srcu_ctl); @@ -659,13 +699,29 @@ static struct rcu_torture_ops srcu_raw_ops = { .read_delay = srcu_read_delay, .readunlock = srcu_torture_read_unlock_raw, .completed = srcu_torture_completed, - .deferred_free = rcu_sync_torture_deferred_free, + .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, + .call = NULL, .cb_barrier = NULL, .stats = srcu_torture_stats, .name = "srcu_raw" }; +static struct rcu_torture_ops srcu_raw_sync_ops = { + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock_raw, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock_raw, + .completed = srcu_torture_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = srcu_torture_synchronize, + .call = NULL, + .cb_barrier = NULL, + .stats = srcu_torture_stats, + .name = "srcu_raw_sync" +}; + static void srcu_torture_synchronize_expedited(void) { synchronize_srcu_expedited(&srcu_ctl); @@ -680,6 +736,7 @@ static struct rcu_torture_ops srcu_expedited_ops = { .completed = srcu_torture_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = srcu_torture_synchronize_expedited, + .call = NULL, .cb_barrier = NULL, .stats = srcu_torture_stats, .name = "srcu_expedited" @@ -1129,7 +1186,8 @@ rcu_torture_printk(char *page) "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " "rtmbe: %d rtbke: %ld rtbre: %ld " "rtbf: %ld rtb: %ld nt: %ld " - "onoff: %ld/%ld:%ld/%ld", + "onoff: %ld/%ld:%ld/%ld " + "barrier: %ld/%ld:%ld", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), @@ -1145,14 +1203,17 @@ rcu_torture_printk(char *page) n_online_successes, n_online_attempts, n_offline_successes, - n_offline_attempts); + n_offline_attempts, + n_barrier_successes, + n_barrier_attempts, + n_rcu_torture_barrier_error); + cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); if (atomic_read(&n_rcu_torture_mberror) != 0 || + n_rcu_torture_barrier_error != 0 || n_rcu_torture_boost_ktrerror != 0 || n_rcu_torture_boost_rterror != 0 || - n_rcu_torture_boost_failure != 0) - cnt += sprintf(&page[cnt], " !!!"); - cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); - if (i > 1) { + n_rcu_torture_boost_failure != 0 || + i > 1) { cnt += sprintf(&page[cnt], "!!! "); atomic_inc(&n_rcu_torture_error); WARN_ON_ONCE(1); @@ -1337,6 +1398,7 @@ static void rcutorture_booster_cleanup(int cpu) /* This must be outside of the mutex, otherwise deadlock! */ kthread_stop(t); + boost_tasks[cpu] = NULL; } static int rcutorture_booster_init(int cpu) @@ -1484,13 +1546,15 @@ static void rcu_torture_onoff_cleanup(void) return; VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); kthread_stop(onoff_task); + onoff_task = NULL; } #else /* #ifdef CONFIG_HOTPLUG_CPU */ -static void +static int rcu_torture_onoff_init(void) { + return 0; } static void rcu_torture_onoff_cleanup(void) @@ -1554,6 +1618,152 @@ static void rcu_torture_stall_cleanup(void) return; VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); kthread_stop(stall_task); + stall_task = NULL; +} + +/* Callback function for RCU barrier testing. */ +void rcu_torture_barrier_cbf(struct rcu_head *rcu) +{ + atomic_inc(&barrier_cbs_invoked); +} + +/* kthread function to register callbacks used to test RCU barriers. */ +static int rcu_torture_barrier_cbs(void *arg) +{ + long myid = (long)arg; + struct rcu_head rcu; + + init_rcu_head_on_stack(&rcu); + VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); + set_user_nice(current, 19); + do { + wait_event(barrier_cbs_wq[myid], + atomic_read(&barrier_cbs_count) == n_barrier_cbs || + kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP); + if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) + break; + cur_ops->call(&rcu, rcu_torture_barrier_cbf); + if (atomic_dec_and_test(&barrier_cbs_count)) + wake_up(&barrier_wq); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping"); + rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); + while (!kthread_should_stop()) + schedule_timeout_interruptible(1); + cur_ops->cb_barrier(); + destroy_rcu_head_on_stack(&rcu); + return 0; +} + +/* kthread function to drive and coordinate RCU barrier testing. */ +static int rcu_torture_barrier(void *arg) +{ + int i; + + VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting"); + do { + atomic_set(&barrier_cbs_invoked, 0); + atomic_set(&barrier_cbs_count, n_barrier_cbs); + /* wake_up() path contains the required barriers. */ + for (i = 0; i < n_barrier_cbs; i++) + wake_up(&barrier_cbs_wq[i]); + wait_event(barrier_wq, + atomic_read(&barrier_cbs_count) == 0 || + kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP); + if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) + break; + n_barrier_attempts++; + cur_ops->cb_barrier(); + if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { + n_rcu_torture_barrier_error++; + WARN_ON_ONCE(1); + } + n_barrier_successes++; + schedule_timeout_interruptible(HZ / 10); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); + rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); + while (!kthread_should_stop()) + schedule_timeout_interruptible(1); + return 0; +} + +/* Initialize RCU barrier testing. */ +static int rcu_torture_barrier_init(void) +{ + int i; + int ret; + + if (n_barrier_cbs == 0) + return 0; + if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { + printk(KERN_ALERT "%s" TORTURE_FLAG + " Call or barrier ops missing for %s,\n", + torture_type, cur_ops->name); + printk(KERN_ALERT "%s" TORTURE_FLAG + " RCU barrier testing omitted from run.\n", + torture_type); + return 0; + } + atomic_set(&barrier_cbs_count, 0); + atomic_set(&barrier_cbs_invoked, 0); + barrier_cbs_tasks = + kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]), + GFP_KERNEL); + barrier_cbs_wq = + kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), + GFP_KERNEL); + if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0) + return -ENOMEM; + for (i = 0; i < n_barrier_cbs; i++) { + init_waitqueue_head(&barrier_cbs_wq[i]); + barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, + (void *)(long)i, + "rcu_torture_barrier_cbs"); + if (IS_ERR(barrier_cbs_tasks[i])) { + ret = PTR_ERR(barrier_cbs_tasks[i]); + VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); + barrier_cbs_tasks[i] = NULL; + return ret; + } + } + barrier_task = kthread_run(rcu_torture_barrier, NULL, + "rcu_torture_barrier"); + if (IS_ERR(barrier_task)) { + ret = PTR_ERR(barrier_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier"); + barrier_task = NULL; + } + return 0; +} + +/* Clean up after RCU barrier testing. */ +static void rcu_torture_barrier_cleanup(void) +{ + int i; + + if (barrier_task != NULL) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task"); + kthread_stop(barrier_task); + barrier_task = NULL; + } + if (barrier_cbs_tasks != NULL) { + for (i = 0; i < n_barrier_cbs; i++) { + if (barrier_cbs_tasks[i] != NULL) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task"); + kthread_stop(barrier_cbs_tasks[i]); + barrier_cbs_tasks[i] = NULL; + } + } + kfree(barrier_cbs_tasks); + barrier_cbs_tasks = NULL; + } + if (barrier_cbs_wq != NULL) { + kfree(barrier_cbs_wq); + barrier_cbs_wq = NULL; + } } static int rcutorture_cpu_notify(struct notifier_block *self, @@ -1598,6 +1808,7 @@ rcu_torture_cleanup(void) fullstop = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); unregister_reboot_notifier(&rcutorture_shutdown_nb); + rcu_torture_barrier_cleanup(); rcu_torture_stall_cleanup(); if (stutter_task) { VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); @@ -1665,6 +1876,7 @@ rcu_torture_cleanup(void) VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); kthread_stop(shutdown_task); } + shutdown_task = NULL; rcu_torture_onoff_cleanup(); /* Wait for all RCU callbacks to fire. */ @@ -1676,7 +1888,7 @@ rcu_torture_cleanup(void) if (cur_ops->cleanup) cur_ops->cleanup(); - if (atomic_read(&n_rcu_torture_error)) + if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); else if (n_online_successes != n_online_attempts || n_offline_successes != n_offline_attempts) @@ -1692,10 +1904,12 @@ rcu_torture_init(void) int i; int cpu; int firsterr = 0; + int retval; static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, - &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops, + &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, + &srcu_raw_sync_ops, &srcu_expedited_ops, &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; mutex_lock(&fullstop_mutex); @@ -1749,6 +1963,7 @@ rcu_torture_init(void) atomic_set(&n_rcu_torture_free, 0); atomic_set(&n_rcu_torture_mberror, 0); atomic_set(&n_rcu_torture_error, 0); + n_rcu_torture_barrier_error = 0; n_rcu_torture_boost_ktrerror = 0; n_rcu_torture_boost_rterror = 0; n_rcu_torture_boost_failure = 0; @@ -1872,7 +2087,6 @@ rcu_torture_init(void) test_boost_duration = 2; if ((test_boost == 1 && cur_ops->can_boost) || test_boost == 2) { - int retval; boost_starttime = jiffies + test_boost_interval * HZ; register_cpu_notifier(&rcutorture_cpu_nb); @@ -1897,9 +2111,22 @@ rcu_torture_init(void) goto unwind; } } - rcu_torture_onoff_init(); + i = rcu_torture_onoff_init(); + if (i != 0) { + firsterr = i; + goto unwind; + } register_reboot_notifier(&rcutorture_shutdown_nb); - rcu_torture_stall_init(); + i = rcu_torture_stall_init(); + if (i != 0) { + firsterr = i; + goto unwind; + } + retval = rcu_torture_barrier_init(); + if (retval != 0) { + firsterr = retval; + goto unwind; + } rcutorture_record_test_transition(); mutex_unlock(&fullstop_mutex); return 0; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d0c5baf1ab18..0da7b88d92d0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -75,6 +75,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .gpnum = -300, \ .completed = -300, \ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ + .orphan_nxttail = &structname##_state.orphan_nxtlist, \ + .orphan_donetail = &structname##_state.orphan_donelist, \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ @@ -145,6 +147,13 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); unsigned long rcutorture_testseq; unsigned long rcutorture_vernum; +/* State information for rcu_barrier() and friends. */ + +static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; +static atomic_t rcu_barrier_cpu_count; +static DEFINE_MUTEX(rcu_barrier_mutex); +static struct completion rcu_barrier_completion; + /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -192,7 +201,6 @@ void rcu_note_context_switch(int cpu) { trace_rcu_utilization("Start context switch"); rcu_sched_qs(cpu); - rcu_preempt_note_context_switch(cpu); trace_rcu_utilization("End context switch"); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); @@ -1311,95 +1319,133 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) #ifdef CONFIG_HOTPLUG_CPU /* - * Move a dying CPU's RCU callbacks to online CPU's callback list. - * Also record a quiescent state for this CPU for the current grace period. - * Synchronization and interrupt disabling are not required because - * this function executes in stop_machine() context. Therefore, cleanup - * operations that might block must be done later from the CPU_DEAD - * notifier. - * - * Note that the outgoing CPU's bit has already been cleared in the - * cpu_online_mask. This allows us to randomly pick a callback - * destination from the bits set in that mask. + * Send the specified CPU's RCU callbacks to the orphanage. The + * specified CPU must be offline, and the caller must hold the + * ->onofflock. */ -static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) +static void +rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, + struct rcu_node *rnp, struct rcu_data *rdp) { int i; - unsigned long mask; - int receive_cpu = cpumask_any(cpu_online_mask); - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); - struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); - RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ - /* First, adjust the counts. */ + /* + * Orphan the callbacks. First adjust the counts. This is safe + * because ->onofflock excludes _rcu_barrier()'s adoption of + * the callbacks, thus no memory barrier is required. + */ if (rdp->nxtlist != NULL) { - receive_rdp->qlen_lazy += rdp->qlen_lazy; - receive_rdp->qlen += rdp->qlen; + rsp->qlen_lazy += rdp->qlen_lazy; + rsp->qlen += rdp->qlen; + rdp->n_cbs_orphaned += rdp->qlen; rdp->qlen_lazy = 0; rdp->qlen = 0; } /* - * Next, move ready-to-invoke callbacks to be invoked on some - * other CPU. These will not be required to pass through another - * grace period: They are done, regardless of CPU. + * Next, move those callbacks still needing a grace period to + * the orphanage, where some other CPU will pick them up. + * Some of the callbacks might have gone partway through a grace + * period, but that is too bad. They get to start over because we + * cannot assume that grace periods are synchronized across CPUs. + * We don't bother updating the ->nxttail[] array yet, instead + * we just reset the whole thing later on. */ - if (rdp->nxtlist != NULL && - rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { - struct rcu_head *oldhead; - struct rcu_head **oldtail; - struct rcu_head **newtail; - - oldhead = rdp->nxtlist; - oldtail = receive_rdp->nxttail[RCU_DONE_TAIL]; - rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; - *rdp->nxttail[RCU_DONE_TAIL] = *oldtail; - *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead; - newtail = rdp->nxttail[RCU_DONE_TAIL]; - for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) { - if (receive_rdp->nxttail[i] == oldtail) - receive_rdp->nxttail[i] = newtail; - if (rdp->nxttail[i] == newtail) - rdp->nxttail[i] = &rdp->nxtlist; - } + if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { + *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL]; + rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL]; + *rdp->nxttail[RCU_DONE_TAIL] = NULL; } /* - * Finally, put the rest of the callbacks at the end of the list. - * The ones that made it partway through get to start over: We - * cannot assume that grace periods are synchronized across CPUs. - * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but - * this does not seem compelling. Not yet, anyway.) + * Then move the ready-to-invoke callbacks to the orphanage, + * where some other CPU will pick them up. These will not be + * required to pass though another grace period: They are done. */ if (rdp->nxtlist != NULL) { - *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; - receive_rdp->nxttail[RCU_NEXT_TAIL] = - rdp->nxttail[RCU_NEXT_TAIL]; - receive_rdp->n_cbs_adopted += rdp->qlen; - rdp->n_cbs_orphaned += rdp->qlen; - - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; + *rsp->orphan_donetail = rdp->nxtlist; + rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; } + /* Finally, initialize the rcu_data structure's list to empty. */ + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; +} + +/* + * Adopt the RCU callbacks from the specified rcu_state structure's + * orphanage. The caller must hold the ->onofflock. + */ +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) +{ + int i; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + /* - * Record a quiescent state for the dying CPU. This is safe - * only because we have already cleared out the callbacks. - * (Otherwise, the RCU core might try to schedule the invocation - * of callbacks on this now-offline CPU, which would be bad.) + * If there is an rcu_barrier() operation in progress, then + * only the task doing that operation is permitted to adopt + * callbacks. To do otherwise breaks rcu_barrier() and friends + * by causing them to fail to wait for the callbacks in the + * orphanage. */ - mask = rdp->grpmask; /* rnp->grplo is constant. */ + if (rsp->rcu_barrier_in_progress && + rsp->rcu_barrier_in_progress != current) + return; + + /* Do the accounting first. */ + rdp->qlen_lazy += rsp->qlen_lazy; + rdp->qlen += rsp->qlen; + rdp->n_cbs_adopted += rsp->qlen; + rsp->qlen_lazy = 0; + rsp->qlen = 0; + + /* + * We do not need a memory barrier here because the only way we + * can get here if there is an rcu_barrier() in flight is if + * we are the task doing the rcu_barrier(). + */ + + /* First adopt the ready-to-invoke callbacks. */ + if (rsp->orphan_donelist != NULL) { + *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; + *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; + for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) + if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) + rdp->nxttail[i] = rsp->orphan_donetail; + rsp->orphan_donelist = NULL; + rsp->orphan_donetail = &rsp->orphan_donelist; + } + + /* And then adopt the callbacks that still need a grace period. */ + if (rsp->orphan_nxtlist != NULL) { + *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist; + rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail; + rsp->orphan_nxtlist = NULL; + rsp->orphan_nxttail = &rsp->orphan_nxtlist; + } +} + +/* + * Trace the fact that this CPU is going offline. + */ +static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) +{ + RCU_TRACE(unsigned long mask); + RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); + RCU_TRACE(struct rcu_node *rnp = rdp->mynode); + + RCU_TRACE(mask = rdp->grpmask); trace_rcu_grace_period(rsp->name, rnp->gpnum + 1 - !!(rnp->qsmask & mask), "cpuofl"); - rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); - /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ } /* * The CPU has been completely removed, and some other CPU is reporting |