summaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree_plugin.h
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-06-21 12:50:01 -0700
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-08-30 16:02:34 -0700
commit3e31009898699dfca823893054748d85048dc7b3 (patch)
treedb39491a0402a8484e3fd8c71116cf80ef4ea275 /kernel/rcu/tree_plugin.h
parentcf7614e13c8fcaf290c5ffaa04b2e1b4f704a52a (diff)
rcu: Defer reporting RCU-preempt quiescent states when disabled
This commit defers reporting of RCU-preempt quiescent states at rcu_read_unlock_special() time when any of interrupts, softirq, or preemption are disabled. These deferred quiescent states are reported at a later RCU_SOFTIRQ, context switch, idle entry, or CPU-hotplug offline operation. Of course, if another RCU read-side critical section has started in the meantime, the reporting of the quiescent state will be further deferred. This also means that disabling preemption, interrupts, and/or softirqs will act as an RCU-preempt read-side critical section. This is enforced by checking preempt_count() as needed. Some special cases must be handled on an ad-hoc basis, for example, context switch is a quiescent state even though both the scheduler and do_exit() disable preemption. In these cases, additional calls to rcu_preempt_deferred_qs() override the preemption disabling. Similar logic overrides disabled interrupts in rcu_preempt_check_callbacks() because in this case the quiescent state happened just before the corresponding scheduling-clock interrupt. In theory, this change lifts a long-standing restriction that required that if interrupts were disabled across a call to rcu_read_unlock() that the matching rcu_read_lock() also be contained within that interrupts-disabled region of code. Because the reporting of the corresponding RCU-preempt quiescent state is now deferred until after interrupts have been enabled, it is no longer possible for this situation to result in deadlocks involving the scheduler's runqueue and priority-inheritance locks. This may allow some code simplification that might reduce interrupt latency a bit. Unfortunately, in practice this would also defer deboosting a low-priority task that had been subjected to RCU priority boosting, so real-time-response considerations might well force this restriction to remain in place. Because RCU-preempt grace periods are now blocked not only by RCU read-side critical sections, but also by disabling of interrupts, preemption, and softirqs, it will be possible to eliminate RCU-bh and RCU-sched in favor of RCU-preempt in CONFIG_PREEMPT=y kernels. This may require some additional plumbing to provide the network denial-of-service guarantees that have been traditionally provided by RCU-bh. Once these are in place, CONFIG_PREEMPT=n kernels will be able to fold RCU-bh into RCU-sched. This would mean that all kernels would have but one flavor of RCU, which would open the door to significant code cleanup. Moving to a single flavor of RCU would also have the beneficial effect of reducing the NOCB kthreads by at least a factor of two. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> [ paulmck: Apply rcu_read_unlock_special() preempt_count() feedback from Joel Fernandes. ] [ paulmck: Adjust rcu_eqs_enter() call to rcu_preempt_deferred_qs() in response to bug reports from kbuild test robot. ] [ paulmck: Fix bug located by kbuild test robot involving recursion via rcu_preempt_deferred_qs(). ]
Diffstat (limited to 'kernel/rcu/tree_plugin.h')
-rw-r--r--kernel/rcu/tree_plugin.h144
1 files changed, 109 insertions, 35 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a97c20ea9bce..542791361908 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -371,6 +371,9 @@ static void rcu_preempt_note_context_switch(bool preempt)
* behalf of preempted instance of __rcu_read_unlock().
*/
rcu_read_unlock_special(t);
+ rcu_preempt_deferred_qs(t);
+ } else {
+ rcu_preempt_deferred_qs(t);
}
/*
@@ -464,54 +467,51 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
}
/*
- * Handle special cases during rcu_read_unlock(), such as needing to
- * notify RCU core processing or task having blocked during the RCU
- * read-side critical section.
+ * Report deferred quiescent states. The deferral time can
+ * be quite short, for example, in the case of the call from
+ * rcu_read_unlock_special().
*/
-static void rcu_read_unlock_special(struct task_struct *t)
+static void
+rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
{
bool empty_exp;
bool empty_norm;
bool empty_exp_now;
- unsigned long flags;
struct list_head *np;
bool drop_boost_mutex = false;
struct rcu_data *rdp;
struct rcu_node *rnp;
union rcu_special special;
- /* NMI handlers cannot block and cannot safely manipulate state. */
- if (in_nmi())
- return;
-
- local_irq_save(flags);
-
/*
* If RCU core is waiting for this CPU to exit its critical section,
* report the fact that it has exited. Because irqs are disabled,
* t->rcu_read_unlock_special cannot change.
*/
special = t->rcu_read_unlock_special;
+ rdp = this_cpu_ptr(rcu_state_p->rda);
+ if (!special.s && !rdp->deferred_qs) {
+ local_irq_restore(flags);
+ return;
+ }
if (special.b.need_qs) {
rcu_preempt_qs();
t->rcu_read_unlock_special.b.need_qs = false;
- if (!t->rcu_read_unlock_special.s) {
+ if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
local_irq_restore(flags);
return;
}
}
/*
- * Respond to a request for an expedited grace period, but only if
- * we were not preempted, meaning that we were running on the same
- * CPU throughout. If we were preempted, the exp_need_qs flag
- * would have been cleared at the time of the first preemption,
- * and the quiescent state would be reported when we were dequeued.
+ * Respond to a request by an expedited grace period for a
+ * quiescent state from this CPU. Note that requests from
+ * tasks are handled when removing the task from the
+ * blocked-tasks list below.
*/
- if (special.b.exp_need_qs) {
- WARN_ON_ONCE(special.b.blocked);
+ if (special.b.exp_need_qs || rdp->deferred_qs) {
t->rcu_read_unlock_special.b.exp_need_qs = false;
- rdp = this_cpu_ptr(rcu_state_p->rda);
+ rdp->deferred_qs = false;
rcu_report_exp_rdp(rcu_state_p, rdp, true);
if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
@@ -519,19 +519,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
}
}
- /* Hardware IRQ handlers cannot block, complain if they get here. */
- if (in_irq() || in_serving_softirq()) {
- lockdep_rcu_suspicious(__FILE__, __LINE__,
- "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
- pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
- t->rcu_read_unlock_special.s,
- t->rcu_read_unlock_special.b.blocked,
- t->rcu_read_unlock_special.b.exp_need_qs,
- t->rcu_read_unlock_special.b.need_qs);
- local_irq_restore(flags);
- return;
- }
-
/* Clean up if blocked during RCU read-side critical section. */
if (special.b.blocked) {
t->rcu_read_unlock_special.b.blocked = false;
@@ -603,6 +590,72 @@ static void rcu_read_unlock_special(struct task_struct *t)
}
/*
+ * Is a deferred quiescent-state pending, and are we also not in
+ * an RCU read-side critical section? It is the caller's responsibility
+ * to ensure it is otherwise safe to report any deferred quiescent
+ * states. The reason for this is that it is safe to report a
+ * quiescent state during context switch even though preemption
+ * is disabled. This function cannot be expected to understand these
+ * nuances, so the caller must handle them.
+ */
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+ return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs ||
+ READ_ONCE(t->rcu_read_unlock_special.s)) &&
+ !t->rcu_read_lock_nesting;
+}
+
+/*
+ * Report a deferred quiescent state if needed and safe to do so.
+ * As with rcu_preempt_need_deferred_qs(), "safe" involves only
+ * not being in an RCU read-side critical section. The caller must
+ * evaluate safety in terms of interrupt, softirq, and preemption
+ * disabling.
+ */
+static void rcu_preempt_deferred_qs(struct task_struct *t)
+{
+ unsigned long flags;
+ bool couldrecurse = t->rcu_read_lock_nesting >= 0;
+
+ if (!rcu_preempt_need_deferred_qs(t))
+ return;
+ if (couldrecurse)
+ t->rcu_read_lock_nesting -= INT_MIN;
+ local_irq_save(flags);
+ rcu_preempt_deferred_qs_irqrestore(t, flags);
+ if (couldrecurse)
+ t->rcu_read_lock_nesting += INT_MIN;
+}
+
+/*
+ * Handle special cases during rcu_read_unlock(), such as needing to
+ * notify RCU core processing or task having blocked during the RCU
+ * read-side critical section.
+ */
+static void rcu_read_unlock_special(struct task_struct *t)
+{
+ unsigned long flags;
+ bool preempt_bh_were_disabled =
+ !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
+ bool irqs_were_disabled;
+
+ /* NMI handlers cannot block and cannot safely manipulate state. */
+ if (in_nmi())
+ return;
+
+ local_irq_save(flags);
+ irqs_were_disabled = irqs_disabled_flags(flags);
+ if ((preempt_bh_were_disabled || irqs_were_disabled) &&
+ t->rcu_read_unlock_special.b.blocked) {
+ /* Need to defer quiescent state until everything is enabled. */
+ raise_softirq_irqoff(RCU_SOFTIRQ);
+ local_irq_restore(flags);
+ return;
+ }
+ rcu_preempt_deferred_qs_irqrestore(t, flags);
+}
+
+/*
* Dump detailed information for all tasks blocking the current RCU
* grace period on the specified rcu_node structure.
*/
@@ -737,10 +790,20 @@ static void rcu_preempt_check_callbacks(void)
struct rcu_state *rsp = &rcu_preempt_state;
struct task_struct *t = current;
- if (t->rcu_read_lock_nesting == 0) {
- rcu_preempt_qs();
+ if (t->rcu_read_lock_nesting > 0 ||
+ (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
+ /* No QS, force context switch if deferred. */
+ if (rcu_preempt_need_deferred_qs(t))
+ resched_cpu(smp_processor_id());
+ } else if (rcu_preempt_need_deferred_qs(t)) {
+ rcu_preempt_deferred_qs(t); /* Report deferred QS. */
+ return;
+ } else if (!t->rcu_read_lock_nesting) {
+ rcu_preempt_qs(); /* Report immediate QS. */
return;
}
+
+ /* If GP is oldish, ask for help from rcu_read_unlock_special(). */
if (t->rcu_read_lock_nesting > 0 &&
__this_cpu_read(rcu_data_p->core_needs_qs) &&
__this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) &&
@@ -859,6 +922,7 @@ void exit_rcu(void)
barrier();
t->rcu_read_unlock_special.b.blocked = true;
__rcu_read_unlock();
+ rcu_preempt_deferred_qs(current);
}
/*
@@ -941,6 +1005,16 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
}
/*
+ * Because there is no preemptible RCU, there can be no deferred quiescent
+ * states.
+ */
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+ return false;
+}
+static void rcu_preempt_deferred_qs(struct task_struct *t) { }
+
+/*
* Because preemptible RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
*/