summaryrefslogtreecommitdiffstats
path: root/tokio/src/runtime/thread_pool
diff options
context:
space:
mode:
authorCarl Lerche <me@carllerche.com>2020-03-28 13:55:12 -0700
committerGitHub <noreply@github.com>2020-03-28 13:55:12 -0700
commitcaa7e180e43fdf914774de86f01f88e6b41f4a32 (patch)
treeacd63c2a01e11f2c728f2d7527efafbc99c66132 /tokio/src/runtime/thread_pool
parent7b2438e7441e98b2a3f72eb239b1c51489b7d9b8 (diff)
rt: cap fifo scheduler slot to avoid starvation (#2349)
The work-stealing scheduler includes an optimization where each worker includes a single slot to store the **last** scheduled task. Tasks in scheduler's LIFO slot are executed next. This speeds up and reduces latency with message passing patterns. Previously, this optimization was susceptible to starving other tasks in certain cases. If two tasks ping-ping between each other without ever yielding, the worker would never execute other tasks. An early PR (#2160) introduced a form of pre-emption. Each task is allocated a per-poll operation budget. Tokio resources will return ready until the budget is depleted, at which point, Tokio resources will always return `Pending`. This patch leverages the operation budget to limit the LIFO scheduler optimization. When executing tasks from the LIFO slot, the budget is **not** reset. Once the budget goes to zero, the task in the LIFO slot is pushed to the back of the queue.
Diffstat (limited to 'tokio/src/runtime/thread_pool')
-rw-r--r--tokio/src/runtime/thread_pool/worker.rs72
1 files changed, 59 insertions, 13 deletions
diff --git a/tokio/src/runtime/thread_pool/worker.rs b/tokio/src/runtime/thread_pool/worker.rs
index c07aa054..400e2a93 100644
--- a/tokio/src/runtime/thread_pool/worker.rs
+++ b/tokio/src/runtime/thread_pool/worker.rs
@@ -34,6 +34,13 @@ struct Core {
/// Used to schedule bookkeeping tasks every so often.
tick: u8,
+ /// When a task is scheduled from a worker, it is stored in this slot. The
+ /// worker will check this slot for a task **before** checking the run
+ /// queue. This effectively results in the **last** scheduled task to be run
+ /// next (LIFO). This is an optimization for message passing patterns and
+ /// helps to reduce latency.
+ lifo_slot: Option<Notified>,
+
/// The worker-local run queue.
run_queue: queue::Local<Arc<Worker>>,
@@ -128,6 +135,7 @@ pub(super) fn create(size: usize, park: Parker) -> (Arc<Shared>, Launch) {
cores.push(Box::new(Core {
tick: 0,
+ lifo_slot: None,
run_queue,
is_searching: false,
is_shutdown: false,
@@ -296,13 +304,37 @@ impl Context {
*self.core.borrow_mut() = Some(core);
// Run the task
- task.run();
-
- // Try to take the core back
- match self.core.borrow_mut().take() {
- Some(core) => Ok(core),
- None => Err(()),
- }
+ crate::coop::budget(|| {
+ task.run();
+
+ // As long as there is budget remaining and a task exists in the
+ // `lifo_slot`, then keep running.
+ loop {
+ // Check if we still have the core. If not, the core was stolen
+ // by another worker.
+ let mut core = match self.core.borrow_mut().take() {
+ Some(core) => core,
+ None => return Err(()),
+ };
+
+ // Check for a task in the LIFO slot
+ let task = match core.lifo_slot.take() {
+ Some(task) => task,
+ None => return Ok(core),
+ };
+
+ if crate::coop::has_budget_remaining() {
+ // Run the LIFO task, then loop
+ *self.core.borrow_mut() = Some(core);
+ task.run();
+ } else {
+ // Not enough budget left to run the LIFO task, push it to
+ // the back of the queue and return.
+ core.run_queue.push_back(task, self.worker.inject());
+ return Ok(core);
+ }
+ }
+ })
}
fn maintenance(&self, mut core: Box<Core>) -> Box<Core> {
@@ -373,12 +405,16 @@ impl Core {
/// Return the next notified task available to this worker.
fn next_task(&mut self, worker: &Worker) -> Option<Notified> {
if self.tick % GLOBAL_POLL_INTERVAL == 0 {
- worker.inject().pop().or_else(|| self.run_queue.pop())
+ worker.inject().pop().or_else(|| self.next_local_task())
} else {
- self.run_queue.pop().or_else(|| worker.inject().pop())
+ self.next_local_task().or_else(|| worker.inject().pop())
}
}
+ fn next_local_task(&mut self) -> Option<Notified> {
+ self.lifo_slot.take().or_else(|| self.run_queue.pop())
+ }
+
fn steal_work(&mut self, worker: &Worker) -> Option<Notified> {
if !self.transition_to_searching(worker) {
return None;
@@ -444,9 +480,9 @@ impl Core {
/// Returns `true` if the transition happened.
fn transition_from_parked(&mut self, worker: &Worker) -> bool {
- // If there is a non-stealable task, then we must unpark regardless of
+ // If a task is in the lifo slot, then we must unpark regardless of
// being notified
- if self.run_queue.has_unstealable() {
+ if self.lifo_slot.is_some() {
worker.shared.idle.unpark_worker_by_id(worker.index);
self.is_searching = true;
return true;
@@ -494,7 +530,7 @@ impl Core {
}
// Drain the queue
- while let Some(_) = self.run_queue.pop() {}
+ while let Some(_) = self.next_local_task() {}
}
fn drain_pending_drop(&mut self, worker: &Worker) {
@@ -639,7 +675,17 @@ impl Shared {
core.run_queue.push_back(task, &self.inject);
true
} else {
- core.run_queue.push(task, &self.inject)
+ // Push to the LIFO slot
+ let prev = core.lifo_slot.take();
+ let ret = prev.is_some();
+
+ if let Some(prev) = prev {
+ core.run_queue.push_back(prev, &self.inject);
+ }
+
+ core.lifo_slot = Some(task);
+
+ ret
};
// Only notify if not currently parked. If `park` is `None`, then the