summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcupdate.c44
-rw-r--r--kernel/rcutiny.c4
-rw-r--r--kernel/rcutiny_plugin.h56
-rw-r--r--kernel/rcutorture.c72
-rw-r--r--kernel/rcutree.c478
-rw-r--r--kernel/rcutree.h46
-rw-r--r--kernel/rcutree_plugin.h223
-rw-r--r--kernel/rcutree_trace.c148
-rw-r--r--kernel/time/tick-sched.c2
9 files changed, 556 insertions, 517 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 95cba41ce1e9..4e6a61b15e86 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -54,6 +54,50 @@
#ifdef CONFIG_PREEMPT_RCU
/*
+ * Preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+ current->rcu_read_lock_nesting++;
+ barrier(); /* critical section after entry code. */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+ struct task_struct *t = current;
+
+ if (t->rcu_read_lock_nesting != 1) {
+ --t->rcu_read_lock_nesting;
+ } else {
+ barrier(); /* critical section before exit code. */
+ t->rcu_read_lock_nesting = INT_MIN;
+ barrier(); /* assign before ->rcu_read_unlock_special load */
+ if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+ rcu_read_unlock_special(t);
+ barrier(); /* ->rcu_read_unlock_special load before assign */
+ t->rcu_read_lock_nesting = 0;
+ }
+#ifdef CONFIG_PROVE_LOCKING
+ {
+ int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
+
+ WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+ }
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+/*
* Check for a task exiting while in a preemptible-RCU read-side
* critical section, clean up if so. No need to issue warnings,
* as debug_check_no_locks_held() already does this if lockdep
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 37a5444204d2..547b1fe5b052 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -172,7 +172,7 @@ void rcu_irq_enter(void)
local_irq_restore(flags);
}
-#ifdef CONFIG_PROVE_RCU
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Test whether RCU thinks that the current CPU is idle.
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)
}
EXPORT_SYMBOL(rcu_is_cpu_idle);
-#endif /* #ifdef CONFIG_PROVE_RCU */
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/*
* Test whether the current CPU was interrupted from idle. Nested
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index fc31a2d65100..918fd1e8509c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
RCU_TRACE(.rcb.name = "rcu_preempt")
};
-static void rcu_read_unlock_special(struct task_struct *t);
static int rcu_preempted_readers_exp(void);
static void rcu_report_exp_done(void);
@@ -351,8 +350,9 @@ static int rcu_initiate_boost(void)
rcu_preempt_ctrlblk.boost_tasks =
rcu_preempt_ctrlblk.gp_tasks;
invoke_rcu_callbacks();
- } else
+ } else {
RCU_TRACE(rcu_initiate_boost_trace());
+ }
return 1;
}
@@ -527,23 +527,11 @@ void rcu_preempt_note_context_switch(void)
}
/*
- * Tiny-preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
- current->rcu_read_lock_nesting++;
- barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
-/*
* Handle special cases during rcu_read_unlock(), such as needing to
* notify RCU core processing or task having blocked during the RCU
* read-side critical section.
*/
-static noinline void rcu_read_unlock_special(struct task_struct *t)
+void rcu_read_unlock_special(struct task_struct *t)
{
int empty;
int empty_exp;
@@ -627,38 +615,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
}
/*
- * Tiny-preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
- struct task_struct *t = current;
-
- barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
- if (t->rcu_read_lock_nesting != 1)
- --t->rcu_read_lock_nesting;
- else {
- t->rcu_read_lock_nesting = INT_MIN;
- barrier(); /* assign before ->rcu_read_unlock_special load */
- if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
- rcu_read_unlock_special(t);
- barrier(); /* ->rcu_read_unlock_special load before assign */
- t->rcu_read_lock_nesting = 0;
- }
-#ifdef CONFIG_PROVE_LOCKING
- {
- int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
-
- WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
- }
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
-/*
* Check for a quiescent state from the current CPU. When a task blocks,
* the task is recorded in the rcu_preempt_ctrlblk structure, which is
* checked elsewhere. This is called from the scheduling-clock interrupt.
@@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void)
rpcp->exp_tasks = NULL;
/* Wait for tail of ->blkd_tasks list to drain. */
- if (!rcu_preempted_readers_exp())
+ if (!rcu_preempted_readers_exp()) {
local_irq_restore(flags);
- else {
+ } else {
rcu_initiate_boost();
local_irq_restore(flags);
wait_event(sync_rcu_preempt_exp_wq,
@@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
*/
int rcu_preempt_needs_cpu(void)
{
- if (!rcu_preempt_running_reader())
- rcu_preempt_cpu_qs();
return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
}
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index e66b34ab7555..25b15033c61f 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -49,8 +49,7 @@
#include <asm/byteorder.h>
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
- "Josh Triplett <josh@freedesktop.org>");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
static int nfakewriters = 4; /* # fake writer threads */
@@ -206,6 +205,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
+static bool barrier_phase; /* Test phase. */
static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
@@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p)
if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
rp->rtort_mbtest = 0;
rcu_torture_free(rp);
- } else
+ } else {
cur_ops->deferred_free(rp);
+ }
}
static int rcu_no_completed(void)
@@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void)
synchronize_srcu(&srcu_ctl);
}
+static void srcu_torture_call(struct rcu_head *head,
+ void (*func)(struct rcu_head *head))
+{
+ call_srcu(&srcu_ctl, head, func);
+}
+
+static void srcu_torture_barrier(void)
+{
+ srcu_barrier(&srcu_ctl);
+}
+
static int srcu_torture_stats(char *page)
{
int cnt = 0;
@@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = {
.completed = srcu_torture_completed,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
- .call = NULL,
- .cb_barrier = NULL,
+ .call = srcu_torture_call,
+ .cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.name = "srcu"
};
@@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg)
do {
schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
udelay(rcu_random(&rand) & 0x3ff);
- cur_ops->sync();
+ if (cur_ops->cb_barrier != NULL &&
+ rcu_random(&rand) % (nfakewriters * 8) == 0)
+ cur_ops->cb_barrier();
+ else
+ cur_ops->sync();
rcu_stutter_wait("rcu_torture_fakewriter");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page)
}
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
cnt += sprintf(&page[cnt],
- "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
- "rtmbe: %d rtbke: %ld rtbre: %ld "
- "rtbf: %ld rtb: %ld nt: %ld "
- "onoff: %ld/%ld:%ld/%ld "
- "barrier: %ld/%ld:%ld",
+ "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
- atomic_read(&n_rcu_torture_free),
+ atomic_read(&n_rcu_torture_free));
+ cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",
atomic_read(&n_rcu_torture_mberror),
n_rcu_torture_boost_ktrerror,
- n_rcu_torture_boost_rterror,
+ n_rcu_torture_boost_rterror);
+ cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
- n_rcu_torture_timers,
+ n_rcu_torture_timers);
+ cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ",
n_online_successes,
n_online_attempts,
n_offline_successes,
- n_offline_attempts,
+ n_offline_attempts);
+ cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
n_barrier_successes,
n_barrier_attempts,
n_rcu_torture_barrier_error);
@@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg)
delta = shutdown_time - jiffies_snap;
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_shutdown task: %lu "
- "jiffies remaining\n",
+ "rcu_torture_shutdown task: %lu jiffies remaining\n",
torture_type, delta);
schedule_timeout_interruptible(delta);
jiffies_snap = ACCESS_ONCE(jiffies);
@@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg)
if (cpu_down(cpu) == 0) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: "
- "offlined %d\n",
+ "rcu_torture_onoff task: offlined %d\n",
torture_type, cpu);
n_offline_successes++;
}
@@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg)
if (cpu_up(cpu) == 0) {
if (verbose)
printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: "
- "onlined %d\n",
+ "rcu_torture_onoff task: onlined %d\n",
torture_type, cpu);
n_online_successes++;
}
@@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu)
static int rcu_torture_barrier_cbs(void *arg)
{
long myid = (long)arg;
+ bool lastphase = 0;
struct rcu_head rcu;
init_rcu_head_on_stack(&rcu);
@@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg)
set_user_nice(current, 19);
do {
wait_event(barrier_cbs_wq[myid],
- atomic_read(&barrier_cbs_count) == n_barrier_cbs ||
+ barrier_phase != lastphase ||
kthread_should_stop() ||
fullstop != FULLSTOP_DONTSTOP);
+ lastphase = barrier_phase;
+ smp_mb(); /* ensure barrier_phase load before ->call(). */
if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
break;
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
@@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg)
do {
atomic_set(&barrier_cbs_invoked, 0);
atomic_set(&barrier_cbs_count, n_barrier_cbs);
- /* wake_up() path contains the required barriers. */
+ smp_mb(); /* Ensure barrier_phase after prior assignments. */
+ barrier_phase = !barrier_phase;
for (i = 0; i < n_barrier_cbs; i++)
wake_up(&barrier_cbs_wq[i]);
wait_event(barrier_wq,
@@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg)
schedule_timeout_interruptible(HZ / 10);
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
- rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
+ rcutorture_shutdown_absorb("rcu_torture_barrier");
while (!kthread_should_stop())
schedule_timeout_interruptible(1);
return 0;
@@ -1908,8 +1925,8 @@ rcu_torture_init(void)
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
&rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
- &srcu_ops, &srcu_sync_ops, &srcu_raw_ops,
- &srcu_raw_sync_ops, &srcu_expedited_ops,
+ &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops,
+ &srcu_raw_ops, &srcu_raw_sync_ops,
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
@@ -1931,8 +1948,7 @@ rcu_torture_init(void)
return -EINVAL;
}
if (cur_ops->fqs == NULL && fqs_duration != 0) {
- printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
- "fqs_duration, fqs disabled.\n");
+ printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
fqs_duration = 0;
}
if (cur_ops->init)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4b97bba7396e..f280e542e3e9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -60,36 +60,44 @@
/* Data structures. */
-static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
-
-#define RCU_STATE_INITIALIZER(structname) { \
- .level = { &structname##_state.node[0] }, \
- .levelcnt = { \
- NUM_RCU_LVL_0, /* root of hierarchy. */ \
- NUM_RCU_LVL_1, \
- NUM_RCU_LVL_2, \
- NUM_RCU_LVL_3, \
- NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
- }, \
+static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
+
+#define RCU_STATE_INITIALIZER(sname, cr) { \
+ .level = { &sname##_state.node[0] }, \
+ .call = cr, \
.fqs_state = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
- .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
- .orphan_nxttail = &structname##_state.orphan_nxtlist, \
- .orphan_donetail = &structname##_state.orphan_donelist, \
- .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
- .n_force_qs = 0, \
- .n_force_qs_ngp = 0, \
- .name = #structname, \
+ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
+ .orphan_nxttail = &sname##_state.orphan_nxtlist, \
+ .orphan_donetail = &sname##_state.orphan_donelist, \
+ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
+ .name = #sname, \
}
-struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
+struct rcu_state rcu_sched_state =
+ RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
-struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
+struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static struct rcu_state *rcu_state;
+LIST_HEAD(rcu_struct_flavors);
+
+/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
+static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
+module_param(rcu_fanout_leaf, int, 0);
+int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
+static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
+ NUM_RCU_LVL_0,
+ NUM_RCU_LVL_1,
+ NUM_RCU_LVL_2,
+ NUM_RCU_LVL_3,
+ NUM_RCU_LVL_4,
+};
+int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
/*
* The rcu_scheduler_active variable transitions from zero to one just
@@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
unsigned long rcutorture_testseq;
unsigned long rcutorture_vernum;
-/* State information for rcu_barrier() and friends. */
-
-static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
-static atomic_t rcu_barrier_cpu_count;
-static DEFINE_MUTEX(rcu_barrier_mutex);
-static struct completion rcu_barrier_completion;
-
/*
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
* permit this function to be invoked without holding the root rcu_node
@@ -358,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
struct task_struct *idle = idle_task(smp_processor_id());
trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
- ftrace_dump(DUMP_ALL);
+ ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
@@ -468,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
trace_rcu_dyntick("Error on exit: not idle task",
oldval, rdtp->dynticks_nesting);
- ftrace_dump(DUMP_ALL);
+ ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
@@ -585,8 +586,6 @@ void rcu_nmi_exit(void)
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
}
-#ifdef CONFIG_PROVE_RCU
-
/**
* rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
*
@@ -604,7 +603,7 @@ int rcu_is_cpu_idle(void)
}
EXPORT_SYMBOL(rcu_is_cpu_idle);
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
/*
* Is the current CPU online? Disable preemption to avoid false positives
@@ -645,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void)
}
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
-#endif /* #ifdef CONFIG_PROVE_RCU */
+#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
/**
* rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
@@ -733,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
int cpu;
long delta;
unsigned long flags;
- int ndetected;
+ int ndetected = 0;
struct rcu_node *rnp = rcu_get_root(rsp);
/* Only let one CPU complain about others per time interval. */
@@ -774,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
*/
rnp = rcu_get_root(rsp);
raw_spin_lock_irqsave(&rnp->lock, flags);
- ndetected = rcu_print_task_stall(rnp);
+ ndetected += rcu_print_task_stall(rnp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
print_cpu_stall_info_end();
@@ -860,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
*/
void rcu_cpu_stall_reset(void)
{
- rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
- rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
- rcu_preempt_stall_reset();
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp)
+ rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
}
static struct notifier_block rcu_panic_block = {
@@ -894,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
if (rnp->qsmask & rdp->grpmask) {
rdp->qs_pending = 1;
rdp->passed_quiesce = 0;
- } else
+ } else {
rdp->qs_pending = 0;
+ }
zero_cpu_stall_ticks(rdp);
}
}
@@ -937,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
}
/*
+ * Initialize the specified rcu_data structure's callback list to empty.
+ */
+static void init_callback_list(struct rcu_data *rdp)
+{
+ int i;
+
+ rdp->nxtlist = NULL;
+ for (i = 0; i < RCU_NEXT_SIZE; i++)
+ rdp->nxttail[i] = &rdp->nxtlist;
+}
+
+/*
* Advance this CPU's callbacks, but only if the current grace period
* has ended. This may be called only from the CPU to whom the rdp
* belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1328,8 +1339,6 @@ static void
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
struct rcu_node *rnp, struct rcu_data *rdp)
{
- int i;
-
/*
* Orphan the callbacks. First adjust the counts. This is safe
* because ->onofflock excludes _rcu_barrier()'s adoption of
@@ -1340,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
rsp->qlen += rdp->qlen;
rdp->n_cbs_orphaned += rdp->qlen;
rdp->qlen_lazy = 0;
- rdp->qlen = 0;
+ ACCESS_ONCE(rdp->qlen) = 0;
}
/*
@@ -1369,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
}
/* Finally, initialize the rcu_data structure's list to empty. */
- rdp->nxtlist = NULL;
- for (i = 0; i < RCU_NEXT_SIZE; i++)
- rdp->nxttail[i] = &rdp->nxtlist;
+ init_callback_list(rdp);
}
/*
@@ -1505,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
rcu_report_exp_rnp(rsp, rnp, true);
+ WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
+ "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
+ cpu, rdp->qlen, rdp->nxtlist);
}
#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1592,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
}
smp_mb(); /* List handling before counting for rcu_barrier(). */
rdp->qlen_lazy -= count_lazy;
- rdp->qlen -= count;
+ ACCESS_ONCE(rdp->qlen) -= count;
rdp->n_cbs_invoked += count;
/* Reinstate batch limit if we have worked down the excess. */
@@ -1605,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
rdp->n_force_qs_snap = rsp->n_force_qs;
} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
rdp->qlen_last_fqs_check = rdp->qlen;
+ WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
local_irq_restore(flags);
@@ -1745,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
break; /* grace period idle or initializing, ignore. */
case RCU_SAVE_DYNTICK:
- if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
- break; /* So gcc recognizes the dead code. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
@@ -1788,9 +1797,10 @@ unlock_fqs_ret:
* whom the rdp belongs.
*/
static void
-__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
+__rcu_process_callbacks(struct rcu_state *rsp)
{
unsigned long flags;
+ struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
WARN_ON_ONCE(rdp->beenonline == 0);
@@ -1826,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
*/
static void rcu_process_callbacks(struct softirq_action *unused)
{
+ struct rcu_state *rsp;
+
trace_rcu_utilization("Start RCU core");
- __rcu_process_callbacks(&rcu_sched_state,
- &__get_cpu_var(rcu_sched_data));
- __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
- rcu_preempt_process_callbacks();
+ for_each_rcu_flavor(rsp)
+ __rcu_process_callbacks(rsp);
trace_rcu_utilization("End RCU core");
}
@@ -1857,6 +1867,56 @@ static void invoke_rcu_core(void)
raise_softirq(RCU_SOFTIRQ);
}
+/*
+ * Handle any core-RCU processing required by a call_rcu() invocation.
+ */
+static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
+ struct rcu_head *head, unsigned long flags)
+{
+ /*
+ * If called from an extended quiescent state, invoke the RCU
+ * core in order to force a re-evaluation of RCU's idleness.
+ */
+ if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
+ invoke_rcu_core();
+
+ /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
+ if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
+ return;
+
+ /*
+ * Force the grace period if too many callbacks or too long waiting.
+ * Enforce hysteresis, and don't invoke force_quiescent_state()
+ * if some other CPU has recently done so. Also, don't bother
+ * invoking force_quiescent_state() if the newly enqueued callback
+ * is the only one waiting for a grace period to complete.
+ */
+ if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
+
+ /* Are we ignoring a completed grace period? */
+ rcu_process_gp_end(rsp, rdp);
+ check_for_new_grace_period(rsp, rdp);
+
+ /* Start a new grace period if one not already started. */
+ if (!rcu_gp_in_progress(rsp)) {
+ unsigned long nestflag;
+ struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+ raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
+ rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
+ } else {
+ /* Give the grace period a kick. */
+ rdp->blimit = LONG_MAX;
+ if (rsp->n_force_qs == rdp->n_force_qs_snap &&
+ *rdp->nxttail[RCU_DONE_TAIL] != head)
+ force_quiescent_state(rsp, 0);
+ rdp->n_force_qs_snap = rsp->n_force_qs;
+ rdp->qlen_last_fqs_check = rdp->qlen;
+ }
+ } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
+ force_quiescent_state(rsp, 1);
+}
+
static void
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
struct rcu_state *rsp, bool lazy)
@@ -1881,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
rdp = this_cpu_ptr(rsp->rda);
/* Add the callback to our list. */
- rdp->qlen++;
+ ACCESS_ONCE(rdp->qlen)++;
if (lazy)
rdp->qlen_lazy++;
else
@@ -1896,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
else
trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
- /* If interrupts were disabled, don't dive into RCU core. */
- if (irqs_disabled_flags(flags)) {
- local_irq_restore(flags);
- return;
- }
-
- /*
- * Force the grace period if too many callbacks or too long waiting.
- * Enforce hysteresis, and don't invoke force_quiescent_state()
- * if some other CPU has recently done so. Also, don't bother
- * invoking force_quiescent_state() if the newly enqueued callback
- * is the only one waiting for a grace period to complete.
- */
- if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
-
- /* Are we ignoring a completed grace period? */
- rcu_process_gp_end(rsp, rdp);
- check_for_new_grace_period(rsp, rdp);
-
- /* Start a new grace period if one not already started. */
- if (!rcu_gp_in_progress(rsp)) {
- unsigned long nestflag;
- struct rcu_node *rnp_root = rcu_get_root(rsp);
-
- raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
- rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
- } else {
- /* Give the grace period a kick. */
- rdp->blimit = LONG_MAX;
- if (rsp->n_force_qs == rdp->n_force_qs_snap &&
- *rdp->nxttail[RCU_DONE_TAIL] != head)
- force_quiescent_state(rsp, 0);
- rdp->n_force_qs_snap = rsp->n_force_qs;
- rdp->qlen_last_fqs_check = rdp->qlen;
- }
- } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
- force_quiescent_state(rsp, 1);
+ /* Go handle any RCU core processing required. */
+ __call_rcu_core(rsp, rdp, head, flags);
local_irq_restore(flags);
}
@@ -1962,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
* occasionally incorrectly indicate that there are multiple CPUs online
* when there was in fact only one the whole time, as this just adds
* some overhead: RCU still operates correctly.
- *
- * Of course, sampling num_online_cpus() with preemption enabled can
- * give erroneous results if there are concurrent CPU-hotplug operations.
- * For example, given a demonic sequence of preemptions in num_online_cpus()
- * and CPU-hotplug operations, there could be two or more CPUs online at
- * all times, but num_online_cpus() might well return one (or even zero).
- *
- * However, all such demonic sequences require at least one CPU-offline
- * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
- * is only a problem if there is an RCU read-side critical section executing
- * throughout. But RCU-sched and RCU-bh read-side critical sections
- * disable either preemption or bh, which prevents a CPU from going offline.
- * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
- * that there is only one CPU when in fact there was more than one throughout
- * is when there were no RCU readers in the system. If there are no
- * RCU readers, the grace period by definition can be of zero length,
- * regardless of the number of online CPUs.
*/
static inline int rcu_blocking_is_gp(void)
{
+ int ret;
+
might_sleep(); /* Check for RCU read-side critical section. */
- return num_online_cpus() <= 1;
+ preempt_disable();
+ ret = num_online_cpus() <= 1;
+ preempt_enable();
+ return ret;
}
/**
@@ -2118,9 +2131,9 @@ void synchronize_sched_expedited(void)
put_online_cpus();
/* No joy, try again later. Or just synchronize_sched(). */
- if (trycount++ < 10)
+ if (trycount++ < 10) {
udelay(trycount * num_online_cpus());
- else {
+ } else {
synchronize_sched();
return;
}
@@ -2241,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
*/
static int rcu_pending(int cpu)
{
- return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
- __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
- rcu_preempt_pending(cpu);
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp)
+ if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
+ return 1;
+ return 0;
}
/*
@@ -2253,20 +2269,41 @@ static int rcu_pending(int cpu)
*/
static int rcu_cpu_has_callbacks(int cpu)
{
+ struct rcu_state *rsp;
+
/* RCU callbacks either ready or pending? */
- return per_cpu(rcu_sched_data, cpu).nxtlist ||
- per_cpu(rcu_bh_data, cpu).nxtlist ||
- rcu_preempt_cpu_has_callbacks(cpu);
+ for_each_rcu_flavor(rsp)
+ if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
+ return 1;
+ return 0;
+}
+
+/*
+ * Helper function for _rcu_barrier() tracing. If tracing is disabled,
+ * the compiler is expected to optimize this away.
+ */
+static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
+ int cpu, unsigned long done)
+{
+ trace_rcu_barrier(rsp->name, s, cpu,
+ atomic_read(&rsp->barrier_cpu_count), done);
}
/*
* RCU callback function for _rcu_barrier(). If we are last, wake
* up the task executing _rcu_barrier().
*/
-static void rcu_barrier_callback(struct rcu_head *notused)
+static void rcu_barrier_callback(struct rcu_head *rhp)
{
- if (atomic_dec_and_test(&rcu_barrier_cpu_count))
- complete(&rcu_barrier_completion);
+ struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
+ struct rcu_state *rsp = rdp->rsp;
+
+ if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
+ _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
+ complete(&rsp->barrier_completion);
+ } else {
+ _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
+ }
}
/*
@@ -2274,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused)
*/
static void rcu_barrier_func(void *type)
{
- int cpu = smp_processor_id();
- struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
- void (*call_rcu_func)(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
+ struct rcu_state *rsp = type;
+ struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
- atomic_inc(&rcu_barrier_cpu_count);
- call_rcu_func = type;
- call_rcu_func(head, rcu_barrier_callback);
+ _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
+ atomic_inc(&rsp->barrier_cpu_count);
+ rsp->call(&rdp->barrier_head, rcu_barrier_callback);
}
/*
* Orchestrate the specified type of RCU barrier, waiting for all
* RCU callbacks of the specified type to complete.
*/
-static void _rcu_barrier(struct rcu_state *rsp,
- void (*call_rcu_func)(struct rcu_head *head,
- void (*func)(struct rcu_head *head)))
+static void _rcu_barrier(struct rcu_state *rsp)
{
int cpu;
unsigned long flags;
struct rcu_data *rdp;
- struct rcu_head rh;
+ struct rcu_data rd;
+ unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
+ unsigned long snap_done;
- init_rcu_head_on_stack(&rh);
+ init_rcu_head_on_stack(&rd.barrier_head);
+ _rcu_barrier_trace(rsp, "Begin", -1, snap);
/* Take mutex to serialize concurrent rcu_barrier() requests. */
- mutex_lock(&rcu_barrier_mutex);
+ mutex_lock(&rsp->barrier_mutex);
+
+ /*
+ * Ensure that all prior references, including to ->n_barrier_done,
+ * are ordered before the _rcu_barrier() machinery.
+ */
+ smp_mb(); /* See above block comment. */
+
+ /*
+ * Recheck ->n_barrier_done to see if others did our work for us.
+ * This means checking ->n_barrier_done for an even-to-odd-to-even
+ * transition. The "if" expression below therefore rounds the old
+ * value up to the next even number and adds two before comparing.
+ */
+ snap_done = ACCESS_ONCE(rsp->n_barrier_done);
+ _rcu_barrier_trace(rsp, "Check", -1, snap_done);
+ if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
+ _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
+ smp_mb(); /* caller's subsequent code after above check. */
+ mutex_unlock(&rsp->barrier_mutex);
+ return;
+ }
- smp_mb(); /* Prevent any prior operations from leaking in. */
+ /*
+ * Increment ->n_barrier_done to avoid duplicate work. Use
+ * ACCESS_ONCE() to prevent the compiler from speculating
+ * the increment to precede the early-exit check.
+ */
+ ACCESS_ONCE(rsp->n_barrier_done)++;
+ WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
+ _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
+ smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */