summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--include/linux/rcupdate.h4
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/srcu.h36
-rw-r--r--include/trace/events/rcu.h13
-rw-r--r--kernel/rcu/rcu.h11
-rw-r--r--kernel/rcu/rcu_segcblist.c18
-rw-r--r--kernel/rcu/rcu_segcblist.h2
-rw-r--r--kernel/rcu/rcuperf.c2
-rw-r--r--kernel/rcu/rcutorture.c15
-rw-r--r--kernel/rcu/srcutiny.c9
-rw-r--r--kernel/rcu/srcutree.c30
-rw-r--r--kernel/rcu/tree.c352
-rw-r--r--kernel/rcu/tree.h26
-rw-r--r--kernel/rcu/tree_exp.h13
-rw-r--r--kernel/rcu/tree_plugin.h98
-rw-r--r--kernel/rcu/update.c50
-rw-r--r--kernel/sched/core.c14
-rw-r--r--kernel/softirq.c3
-rw-r--r--kernel/torture.c2
-rw-r--r--kernel/trace/trace_benchmark.c4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh12
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh115
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-torture.sh105
29 files changed, 468 insertions, 540 deletions
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index a27fbfb0efb8..65eb856526b7 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -1,3 +1,5 @@
+What is RCU? -- "Read, Copy, Update"
+
Please note that the "What is RCU?" LWN series is an excellent place
to start learning about RCU:
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9df4f71e58ca..c3cea8a29843 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -349,7 +349,7 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry);
- cleanup_srcu_struct(&head->srcu);
+ cleanup_srcu_struct_quiesced(&head->srcu);
kfree(head);
}
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 36360d07f25b..19d235fefdb9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -188,13 +188,13 @@ static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
/**
- * cond_resched_rcu_qs - Report potential quiescent states to RCU
+ * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
*
* This macro resembles cond_resched(), except that it is defined to
* report potential quiescent states to RCU-tasks even if the cond_resched()
* machinery were to be shut off, as some advocate for PREEMPT kernels.
*/
-#define cond_resched_rcu_qs() \
+#define cond_resched_tasks_rcu_qs() \
do { \
if (!cond_resched()) \
rcu_note_voluntary_context_switch_lite(current); \
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index fd996cdf1833..448f20f27396 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void)
void rcu_barrier(void);
void rcu_barrier_bh(void);
void rcu_barrier_sched(void);
+bool rcu_eqs_special_set(int cpu);
unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate);
unsigned long get_state_synchronize_sched(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b3d697f3b573..6fc99045658a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1613,7 +1613,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
* explicit rescheduling in places that are safe. The return
* value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling,
- * cond_resched_softirq() will enable bhs before scheduling.
*/
#ifndef CONFIG_PREEMPT
extern int _cond_resched(void);
@@ -1633,13 +1632,6 @@ extern int __cond_resched_lock(spinlock_t *lock);
__cond_resched_lock(lock); \
})
-extern int __cond_resched_softirq(void);
-
-#define cond_resched_softirq() ({ \
- ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
- __cond_resched_softirq(); \
-})
-
static inline void cond_resched_rcu(void)
{
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 33c1c698df09..91494d7e8e41 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -69,11 +69,45 @@ struct srcu_struct { };
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
void (*func)(struct rcu_head *head));
-void cleanup_srcu_struct(struct srcu_struct *sp);
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
void synchronize_srcu(struct srcu_struct *sp);
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+static inline void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+ _cleanup_srcu_struct(sp, false);
+}
+
+/**
+ * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory. Also,
+ * all grace-period processing must have completed.
+ *
+ * "Completed" means that the last synchronize_srcu() and
+ * synchronize_srcu_expedited() calls must have returned before the call
+ * to cleanup_srcu_struct_quiesced(). It also means that the callback
+ * from the last call_srcu() must have been invoked before the call to
+ * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
+ * with this last. Violating these rules will get you a WARN_ON() splat
+ * (with high probability, anyway), and will also cause the srcu_struct
+ * to be leaked.
+ */
+static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
+{
+ _cleanup_srcu_struct(sp, true);
+}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/**
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d8c33298c153..5936aac357ab 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period,
);
/*
- * Tracepoint for future grace-period events, including those for no-callbacks
- * CPUs. The caller should pull the data from the rcu_node structure,
- * other than rcuname, which comes from the rcu_state structure, and event,
- * which is one of the following:
+ * Tracepoint for future grace-period events. The caller should pull
+ * the data from the rcu_node structure, other than rcuname, which comes
+ * from the rcu_state structure, and event, which is one of the following:
*
- * "Startleaf": Request a nocb grace period based on leaf-node data.
+ * "Startleaf": Request a grace period based on leaf-node data.
+ * "Prestarted": Someone beat us to the request
* "Startedleaf": Leaf-node start proved sufficient.
* "Startedleafroot": Leaf-node start proved sufficient after checking root.
* "Startedroot": Requested a nocb grace period based on root-node data.
+ * "NoGPkthread": The RCU grace-period kthread has not yet started.
* "StartWait": Start waiting for the requested grace period.
* "ResumeWait": Resume waiting after signal.
* "EndWait": Complete wait.
* "Cleanup": Clean up rcu_node structure after previous GP.
- * "CleanupMore": Clean up, and another no-CB GP is needed.
+ * "CleanupMore": Clean up, and another GP is needed.
*/
TRACE_EVENT(rcu_future_grace_period,
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 976019d6fa06..40cea6735c2d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
}
}
+/* Returns first leaf rcu_node of the specified RCU flavor. */
+#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1])
+
+/* Is this rcu_node a leaf? */
+#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
+
/*
* Do a full breadth-first scan of the rcu_node structures for the
* specified rcu_state structure.
@@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* rcu_node tree with but one rcu_node structure, this loop is a no-op.
*/
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
- for ((rnp) = &(rsp)->node[0]; \
- (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
+ for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)
/*
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* It is still a leaf node, even if it is also the root node.
*/
#define rcu_for_each_leaf_node(rsp, rnp) \
- for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+ for ((rnp) = rcu_first_leaf_node(rsp); \
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/*
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 88cba7c2956c..5aff271adf1e 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -404,24 +404,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
}
/*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq". We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
- unsigned long seq)
-{
- int i;
-
- for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
- if (rsclp->tails[i - 1] != rsclp->tails[i] &&
- ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
- return true;
- return false;
-}
-
-/*
* Merge the source rcu_segcblist structure into the destination
* rcu_segcblist structure, then initialize the source. Any pending
* callbacks from the source get to start over. It is best to
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 581c12b63544..948470cef385 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp);
void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
- unsigned long seq);
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
struct rcu_segcblist *src_rsclp);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 777e7a6a0292..e232846516b3 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void)
*/
static void rcu_perf_wait_shutdown(void)
{
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
return;
while (!torture_must_stop())
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 680c96d8c00f..e628fcfd1bde 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -593,7 +593,12 @@ static void srcu_torture_init(void)
static void srcu_torture_cleanup(void)
{
- cleanup_srcu_struct(&srcu_ctld);
+ static DEFINE_TORTURE_RANDOM(rand);
+
+ if (torture_random(&rand) & 0x800)
+ cleanup_srcu_struct(&srcu_ctld);
+ else
+ cleanup_srcu_struct_quiesced(&srcu_ctld);
srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
}
@@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp;
static void
rcu_torture_cleanup(void)
{
+ int flags = 0;
+ unsigned long gpnum = 0;
+ unsigned long completed = 0;
int i;
rcutorture_record_test_transition();
@@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void)
fakewriter_tasks = NULL;
}
+ rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed);
+ srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
+ &flags, &gpnum, &completed);
+ pr_alert("%s: End-test grace-period state: g%lu c%lu f%#x\n",
+ cur_ops->name, gpnum, completed, flags);
torture_stop_kthread(rcu_torture_stats, stats_task);
torture_stop_kthread(rcu_torture_fqs, fqs_task);
for (i = 0; i < ncbflooders; i++)
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 76ac5f50b2c7..622792abe41a 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
-void cleanup_srcu_struct(struct srcu_struct *sp)
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
{
WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
- flush_work(&sp->srcu_work);
+ if (quiesced)
+ WARN_ON(work_pending(&sp->srcu_work));
+ else
+ flush_work(&sp->srcu_work);
WARN_ON(sp->srcu_gp_running);
WARN_ON(sp->srcu_gp_waiting);
WARN_ON(sp->srcu_cb_head);
WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/*
* Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index fb560fca9ef4..b4123d7a2cec 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp)
return SRCU_INTERVAL;
}
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
+/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
{
int cpu;
if (WARN_ON(!srcu_get_delay(sp)))
- return; /* Leakage unless caller handles error. */
+ return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(sp)))
- return; /* Leakage unless caller handles error. */
- flush_delayed_work(&sp->work);
+ return; /* Just leak it! */
+ if (quiesced) {
+ if (WARN_ON(delayed_work_pending(&sp->work)))
+ return; /* Just leak it! */
+ } else {
+ flush_delayed_work(&sp->work);
+ }
for_each_possible_cpu(cpu)
- flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+ if (quiesced) {
+ if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
+ return; /* Just leak it! */
+ } else {
+ flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+ }
if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
WARN_ON(srcu_readers_active(sp))) {
pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
@@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
free_percpu(sp->sda);
sp->sda = NULL;
}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 23781fc90830..4fccdfa25716 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644);
static ulong jiffies_till_sched_qs = HZ / 10;
module_param(jiffies_till_sched_qs, ulong, 0444);
-static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
- struct rcu_data *rdp);
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
static void force_quiescent_state(struct rcu_state *rsp);
static int rcu_pending(void);
@@ -711,44 +709,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
}
/*
- * Is there any need for future grace periods?
- * Interrupts must be disabled. If the caller does not hold the root
- * rnp_node structure's ->lock, the results are advisory only.
- */
-static int rcu_future_needs_gp(struct rcu_state *rsp)
-{
- struct rcu_node *rnp = rcu_get_root(rsp);
- int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
- int *fp = &rnp->need_future_gp[idx];
-
- lockdep_assert_irqs_disabled();
- return READ_ONCE(*fp);
-}
-
-/*
- * Does the current CPU require a not-yet-started grace period?
- * The caller must have disabled interrupts to prevent races with
- * normal callback registry.
- */
-static bool
-cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
-{
- lockdep_assert_irqs_disabled();
- if (rcu_gp_in_progress(rsp))
- return false; /* No, a grace period is already in progress. */
- if (rcu_future_needs_gp(rsp))
- return true; /* Yes, a no-CBs CPU needs one. */
- if (!rcu_segcblist_is_enabled(&rdp->cblist))
- return false; /* No, this is a no-CBs (or offline) CPU. */
- if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
- return true; /* Yes, CPU has newly registered callbacks. */
- if (rcu_segcblist_future_gp_needed(&rdp->cblist,
- READ_ONCE(rsp->completed)))
- return true; /* Yes, CBs for future grace period. */
- return false; /* No grace period needed. */
-}
-
-/*
* Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
@@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
}
/*
- * Has this CPU encountered a cond_resched_rcu_qs() since the
- * beginning of the grace period? For this to be the case,
- * the CPU has to have noticed the current grace period. This
- * might not be the case for nohz_full CPUs looping in the kernel.
+ * Has this CPU encountered a cond_resched() since the beginning
+ * of the grace period? For this to be the case, the CPU has to
+ * have noticed the current grace period. This might not be the
+ * case for nohz_full CPUs looping in the kernel.
*/
jtsq = jiffies_till_sched_qs;
ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
@@ -1642,18 +1602,30 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
return rnp->completed + 1;
/*
+ * If the current rcu_node structure believes that RCU is
+ * idle, and if the rcu_state structure does not yet reflect
+ * the start of a new grace period, then the next grace period
+ * will suffice. The memory barrier is needed to accurately
+ * sample the rsp->gpnum, and pairs with the second lock
+ * acquisition in rcu_gp_init(), which is augmented with
+ * smp_mb__after_unlock_lock() for this purpose.
+ */
+ if (rnp->gpnum == rnp->completed) {
+ smp_mb(); /* See above block comment. */
+ if (READ_ONCE(rsp->gpnum) == rnp->completed)
+ return rnp->completed + 1;
+ }
+
+ /*
* Otherwise, wait for a possible partial grace period and
* then the subsequent full grace period.
*/
return rnp->completed + 2;
}
-/*
- * Trace-event helper function for rcu_start_future_gp() and
- * rcu_nocb_wait_gp().
- */
-static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
- unsigned long c, const char *s)
+/* Trace-event wrapper function for trace_rcu_future_grace_period. */
+static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+ unsigned long c, const char *s)
{
trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
rnp->completed, c, rnp->level,
@@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
}
/*
- * Start some future grace period, as needed to handle newly arrived
+ * Start the specified grace period, as needed to handle newly arrived
* callbacks. The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp field. Returns true if there
+ * rcu_node structure's ->need_future_gp[] field. Returns true if there
* is reason to awaken the grace-period kthread.
*
- * The caller must hold the specified rcu_node structure's ->lock.
+ * The caller must hold the specified rcu_node structure's ->lock, which
+ * is why the caller is responsible for waking the grace-period kthread.
*/
-static bool __maybe_unused
-rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
- unsigned long *c_out)
+static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+ unsigned long c)
{
- unsigned long c;
bool ret = false;
- struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
-
- raw_lockdep_assert_held_rcu_node(rnp);
-
- /*
- * Pick up grace-period number for new callbacks. If this
- * grace period is already marked as needed, return to the caller.
- */
- c = rcu_cbs_completed(rdp->rsp, rnp);
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
- if (rnp->need_future_gp[c & 0x1]) {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
- goto out;
- }
+ struct rcu_state *rsp = rdp->rsp;
+ struct rcu_node *rnp_root;
/*
- * If either this rcu_node structure or the root rcu_node structure
- * believe that a grace period is in progress, then we must wait
- * for the one following, which is in "c". Because our request
- * will be noticed at the end of the current grace period, we don't
- * need to explicitly start one. We only do the lockless check
- * of rnp_root's fields if the current rcu_node structure thinks
- * there is no grace period in flight, and because we hold rnp->lock,
- * the only possible change is when rnp_root's two fields are
- * equal, in which case rnp_root->gpnum might be concurrently
- * incremented. But that is OK, as it will just result in our
- * doing some extra useless work.
+ * Use funnel locking to either acquire the root rcu_node
+ * structure's lock or bail out if the need for this grace period
+ * has already been recorded -- or has already started. If there
+ * is already a grace period in progress in a non-leaf node, no
+ * recording is needed because the end of the grace period will
+ * scan the leaf rcu_node structures. Note that rnp->lock must
+ * not be released.
*/
- if (rnp->gpnum != rnp->completed ||
- READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) {
- rnp->need_future_gp[c & 0x1]++;
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
- goto out;
+ raw_lockdep_assert_held_rcu_node(rnp);
+ trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf"));
+ for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
+ if (rnp_root != rnp)
+ raw_spin_lock_rcu_node(rnp_root);
+ WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum +
+ need_future_gp_mask(), c));
+ if (need_future_gp_element(rnp_root, c) ||
+ ULONG_CMP_GE(rnp_root->gpnum, c) ||
+ (rnp != rnp_root &&
+ rnp_root->gpnum != rnp_root->completed)) {
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
+ goto unlock_out;
+ }
+ need_future_gp_element(rnp_root, c) = true;
+ if (rnp_root != rnp && rnp_root->parent != NULL)
+ raw_spin_unlock_rcu_node(rnp_root);
+ if (!rnp_root->parent)
+ break; /* At root, and perhaps also leaf. */
}
- /*
- * There might be no grace period in progress. If we don't already
- * hold it, acquire the root rcu_node structure's lock in order to
- * start one (if needed).
- */
- if (rnp != rnp_root)
- raw_spin_lock_rcu_node(rnp_root);
-
- /*
- * Get a new grace-period number. If there really is no grace
- * period in progress, it will be smaller than the one we obtained
- * earlier. Adjust callbacks as needed.
- */
- c = rcu_cbs_completed(rdp->rsp, rnp_root);
- if (!rcu_is_nocb_cpu(rdp->cpu))
- (void)rcu_segcblist_accelerate(&rdp->cblist, c);
-
- /*
- * If the needed for the required grace period is already
- * recorded, trace and leave.
- */
- if (rnp_root->need_future_gp[c & 0x1]) {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
+ /* If GP already in progress, just leave, otherwise start one. */
+ if (rnp_root->gpnum != rnp_root->completed) {
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));
goto unlock_out;
}
-
- /* Record the need for the future grace period. */
- rnp_root->need_future_gp[c & 0x1]++;
-
- /* If a grace period is not already in progress, start one. */
- if (rnp_root->gpnum != rnp_root->completed) {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
- } else {
- trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
- ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
+ WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
+ if (!rsp->gp_kthread) {
+ trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
+ goto unlock_out;
}
+ trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
+ ret = true; /* Caller must wake GP kthread. */
unlock_out:
if (rnp != rnp_root)
raw_spin_unlock_rcu_node(rnp_root);
-out:
- if (c_out != NULL)
- *c_out = c;
return ret;
}
@@ -1758,16 +1701,16 @@ out:
* Clean up any old requests for the just-ended grace period. Also return
* whether any additional grace periods have been requested.
*/
-static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
- int c = rnp->completed;
- int needmore;
+ unsigned long c = rnp->completed;
+ bool needmore;
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
- rnp->need_future_gp[c & 0x1] = 0;
- needmore = rnp->need_future_gp[(c + 1) & 0x1];
- trace_rcu_future_gp(rnp, rdp, c,
- needmore ? TPS("CleanupMore") : TPS("Cleanup"));
+ need_future_gp_element(rnp, c) = false;
+ needmore = need_any_future_gp(rnp);
+ trace_rcu_this_gp(rnp, rdp, c,
+ needmore ? TPS("CleanupMore") : TPS("Cleanup"));
return needmore;
}
@@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
+ unsigned long c;
bool ret = false;
raw_lockdep_assert_held_rcu_node(rnp);
@@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
* accelerating callback invocation to an earlier grace-period
* number.
*/
- if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
- ret = rcu_start_future_gp(rnp, rdp, NULL);
+ c = rcu_cbs_completed(rsp, rnp);
+ if (rcu_segcblist_accelerate(&rdp->cblist, c))
+ ret = rcu_start_this_gp(rnp, rdp, c);
/* Trace depending on how much we were able to accelerate. */
if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
@@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
rnp->level, rnp->grplo,
rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq_rcu_node(rnp);
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
}
@@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
{
unsigned long gp_duration;
bool needgp = false;
- int nocb = 0;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
struct swait_queue_head *sq;
@@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
if (rnp == rdp->mynode)
needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
/* smp_mb() provided by prior unlock-lock pair. */
- nocb += rcu_future_gp_cleanup(rsp, rnp);
+ needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;
sq = rcu_nocb_gp_get(rnp);
raw_spin_unlock_irq_rcu_node(rnp);
rcu_nocb_gp_cleanup(sq);
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
rcu_gp_slow(rsp, gp_cleanup_delay);
}
rnp = rcu_get_root(rsp);
raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
- rcu_nocb_gp_set(rnp, nocb);
/* Declare grace period done. */
WRITE_ONCE(rsp->completed, rsp->gpnum);
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
rsp->gp_state = RCU_GP_IDLE;
+ /* Check for GP requests since above loop. */
rdp = this_cpu_ptr(rsp->rda);
+ if (need_any_future_gp(rnp)) {
+ trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
+ TPS("CleanupMore"));
+ needgp = true;
+ }
/* Advance CBs to reduce false positives below. */
- needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
- if (needgp || cpu_needs_another_gp(rsp, rdp)) {
+ if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
- trace_rcu_grace_period(rsp->name,
- READ_ONCE(rsp->gpnum),
+ trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
TPS("newreq"));
}
+ WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
raw_spin_unlock_irq_rcu_node(rnp);
}
@@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
/* Locking provides needed memory barrier. */
if (rcu_gp_init(rsp))
break;
- cond_resched_rcu_qs();
+ cond_resched_tasks_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
WARN_ON(signal_pending(current));
trace_rcu_grace_period(rsp->name,
@@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqsend"));
- cond_resched_rcu_qs();