diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-06-21 12:50:01 -0700 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-08-30 16:02:34 -0700 |
commit | 3e31009898699dfca823893054748d85048dc7b3 (patch) | |
tree | db39491a0402a8484e3fd8c71116cf80ef4ea275 /kernel/rcu/tree_plugin.h | |
parent | cf7614e13c8fcaf290c5ffaa04b2e1b4f704a52a (diff) |
rcu: Defer reporting RCU-preempt quiescent states when disabled
This commit defers reporting of RCU-preempt quiescent states at
rcu_read_unlock_special() time when any of interrupts, softirq, or
preemption are disabled. These deferred quiescent states are reported
at a later RCU_SOFTIRQ, context switch, idle entry, or CPU-hotplug
offline operation. Of course, if another RCU read-side critical
section has started in the meantime, the reporting of the quiescent
state will be further deferred.
This also means that disabling preemption, interrupts, and/or
softirqs will act as an RCU-preempt read-side critical section.
This is enforced by checking preempt_count() as needed.
Some special cases must be handled on an ad-hoc basis, for example,
context switch is a quiescent state even though both the scheduler and
do_exit() disable preemption. In these cases, additional calls to
rcu_preempt_deferred_qs() override the preemption disabling. Similar
logic overrides disabled interrupts in rcu_preempt_check_callbacks()
because in this case the quiescent state happened just before the
corresponding scheduling-clock interrupt.
In theory, this change lifts a long-standing restriction that required
that if interrupts were disabled across a call to rcu_read_unlock()
that the matching rcu_read_lock() also be contained within that
interrupts-disabled region of code. Because the reporting of the
corresponding RCU-preempt quiescent state is now deferred until
after interrupts have been enabled, it is no longer possible for this
situation to result in deadlocks involving the scheduler's runqueue and
priority-inheritance locks. This may allow some code simplification that
might reduce interrupt latency a bit. Unfortunately, in practice this
would also defer deboosting a low-priority task that had been subjected
to RCU priority boosting, so real-time-response considerations might
well force this restriction to remain in place.
Because RCU-preempt grace periods are now blocked not only by RCU
read-side critical sections, but also by disabling of interrupts,
preemption, and softirqs, it will be possible to eliminate RCU-bh and
RCU-sched in favor of RCU-preempt in CONFIG_PREEMPT=y kernels. This may
require some additional plumbing to provide the network denial-of-service
guarantees that have been traditionally provided by RCU-bh. Once these
are in place, CONFIG_PREEMPT=n kernels will be able to fold RCU-bh
into RCU-sched. This would mean that all kernels would have but
one flavor of RCU, which would open the door to significant code
cleanup.
Moving to a single flavor of RCU would also have the beneficial effect
of reducing the NOCB kthreads by at least a factor of two.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Apply rcu_read_unlock_special() preempt_count() feedback
from Joel Fernandes. ]
[ paulmck: Adjust rcu_eqs_enter() call to rcu_preempt_deferred_qs() in
response to bug reports from kbuild test robot. ]
[ paulmck: Fix bug located by kbuild test robot involving recursion
via rcu_preempt_deferred_qs(). ]
Diffstat (limited to 'kernel/rcu/tree_plugin.h')
-rw-r--r-- | kernel/rcu/tree_plugin.h | 144 |
1 files changed, 109 insertions, 35 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a97c20ea9bce..542791361908 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -371,6 +371,9 @@ static void rcu_preempt_note_context_switch(bool preempt) * behalf of preempted instance of __rcu_read_unlock(). */ rcu_read_unlock_special(t); + rcu_preempt_deferred_qs(t); + } else { + rcu_preempt_deferred_qs(t); } /* @@ -464,54 +467,51 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) } /* - * Handle special cases during rcu_read_unlock(), such as needing to - * notify RCU core processing or task having blocked during the RCU - * read-side critical section. + * Report deferred quiescent states. The deferral time can + * be quite short, for example, in the case of the call from + * rcu_read_unlock_special(). */ -static void rcu_read_unlock_special(struct task_struct *t) +static void +rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) { bool empty_exp; bool empty_norm; bool empty_exp_now; - unsigned long flags; struct list_head *np; bool drop_boost_mutex = false; struct rcu_data *rdp; struct rcu_node *rnp; union rcu_special special; - /* NMI handlers cannot block and cannot safely manipulate state. */ - if (in_nmi()) - return; - - local_irq_save(flags); - /* * If RCU core is waiting for this CPU to exit its critical section, * report the fact that it has exited. Because irqs are disabled, * t->rcu_read_unlock_special cannot change. */ special = t->rcu_read_unlock_special; + rdp = this_cpu_ptr(rcu_state_p->rda); + if (!special.s && !rdp->deferred_qs) { + local_irq_restore(flags); + return; + } if (special.b.need_qs) { rcu_preempt_qs(); t->rcu_read_unlock_special.b.need_qs = false; - if (!t->rcu_read_unlock_special.s) { + if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) { local_irq_restore(flags); return; } } /* - * Respond to a request for an expedited grace period, but only if - * we were not preempted, meaning that we were running on the same - * CPU throughout. If we were preempted, the exp_need_qs flag - * would have been cleared at the time of the first preemption, - * and the quiescent state would be reported when we were dequeued. + * Respond to a request by an expedited grace period for a + * quiescent state from this CPU. Note that requests from + * tasks are handled when removing the task from the + * blocked-tasks list below. */ - if (special.b.exp_need_qs) { - WARN_ON_ONCE(special.b.blocked); + if (special.b.exp_need_qs || rdp->deferred_qs) { t->rcu_read_unlock_special.b.exp_need_qs = false; - rdp = this_cpu_ptr(rcu_state_p->rda); + rdp->deferred_qs = false; rcu_report_exp_rdp(rcu_state_p, rdp, true); if (!t->rcu_read_unlock_special.s) { local_irq_restore(flags); @@ -519,19 +519,6 @@ static void rcu_read_unlock_special(struct task_struct *t) } } - /* Hardware IRQ handlers cannot block, complain if they get here. */ - if (in_irq() || in_serving_softirq()) { - lockdep_rcu_suspicious(__FILE__, __LINE__, - "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); - pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", - t->rcu_read_unlock_special.s, - t->rcu_read_unlock_special.b.blocked, - t->rcu_read_unlock_special.b.exp_need_qs, - t->rcu_read_unlock_special.b.need_qs); - local_irq_restore(flags); - return; - } - /* Clean up if blocked during RCU read-side critical section. */ if (special.b.blocked) { t->rcu_read_unlock_special.b.blocked = false; @@ -603,6 +590,72 @@ static void rcu_read_unlock_special(struct task_struct *t) } /* + * Is a deferred quiescent-state pending, and are we also not in + * an RCU read-side critical section? It is the caller's responsibility + * to ensure it is otherwise safe to report any deferred quiescent + * states. The reason for this is that it is safe to report a + * quiescent state during context switch even though preemption + * is disabled. This function cannot be expected to understand these + * nuances, so the caller must handle them. + */ +static bool rcu_preempt_need_deferred_qs(struct task_struct *t) +{ + return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs || + READ_ONCE(t->rcu_read_unlock_special.s)) && + !t->rcu_read_lock_nesting; +} + +/* + * Report a deferred quiescent state if needed and safe to do so. + * As with rcu_preempt_need_deferred_qs(), "safe" involves only + * not being in an RCU read-side critical section. The caller must + * evaluate safety in terms of interrupt, softirq, and preemption + * disabling. + */ +static void rcu_preempt_deferred_qs(struct task_struct *t) +{ + unsigned long flags; + bool couldrecurse = t->rcu_read_lock_nesting >= 0; + + if (!rcu_preempt_need_deferred_qs(t)) + return; + if (couldrecurse) + t->rcu_read_lock_nesting -= INT_MIN; + local_irq_save(flags); + rcu_preempt_deferred_qs_irqrestore(t, flags); + if (couldrecurse) + t->rcu_read_lock_nesting += INT_MIN; +} + +/* + * Handle special cases during rcu_read_unlock(), such as needing to + * notify RCU core processing or task having blocked during the RCU + * read-side critical section. + */ +static void rcu_read_unlock_special(struct task_struct *t) +{ + unsigned long flags; + bool preempt_bh_were_disabled = + !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)); + bool irqs_were_disabled; + + /* NMI handlers cannot block and cannot safely manipulate state. */ + if (in_nmi()) + return; + + local_irq_save(flags); + irqs_were_disabled = irqs_disabled_flags(flags); + if ((preempt_bh_were_disabled || irqs_were_disabled) && + t->rcu_read_unlock_special.b.blocked) { + /* Need to defer quiescent state until everything is enabled. */ + raise_softirq_irqoff(RCU_SOFTIRQ); + local_irq_restore(flags); + return; + } + rcu_preempt_deferred_qs_irqrestore(t, flags); +} + +/* * Dump detailed information for all tasks blocking the current RCU * grace period on the specified rcu_node structure. */ @@ -737,10 +790,20 @@ static void rcu_preempt_check_callbacks(void) struct rcu_state *rsp = &rcu_preempt_state; struct task_struct *t = current; - if (t->rcu_read_lock_nesting == 0) { - rcu_preempt_qs(); + if (t->rcu_read_lock_nesting > 0 || + (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) { + /* No QS, force context switch if deferred. */ + if (rcu_preempt_need_deferred_qs(t)) + resched_cpu(smp_processor_id()); + } else if (rcu_preempt_need_deferred_qs(t)) { + rcu_preempt_deferred_qs(t); /* Report deferred QS. */ + return; + } else if (!t->rcu_read_lock_nesting) { + rcu_preempt_qs(); /* Report immediate QS. */ return; } + + /* If GP is oldish, ask for help from rcu_read_unlock_special(). */ if (t->rcu_read_lock_nesting > 0 && __this_cpu_read(rcu_data_p->core_needs_qs) && __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) && @@ -859,6 +922,7 @@ void exit_rcu(void) barrier(); t->rcu_read_unlock_special.b.blocked = true; __rcu_read_unlock(); + rcu_preempt_deferred_qs(current); } /* @@ -941,6 +1005,16 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) } /* + * Because there is no preemptible RCU, there can be no deferred quiescent + * states. + */ +static bool rcu_preempt_need_deferred_qs(struct task_struct *t) +{ + return false; +} +static void rcu_preempt_deferred_qs(struct task_struct *t) { } + +/* * Because preemptible RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. */ |