summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-uids2
-rw-r--r--Documentation/scheduler/completion.rst (renamed from Documentation/scheduler/completion.txt)38
-rw-r--r--Documentation/scheduler/index.rst29
-rw-r--r--Documentation/scheduler/sched-arch.rst (renamed from Documentation/scheduler/sched-arch.txt)18
-rw-r--r--Documentation/scheduler/sched-bwc.rst (renamed from Documentation/scheduler/sched-bwc.txt)30
-rw-r--r--Documentation/scheduler/sched-deadline.rst (renamed from Documentation/scheduler/sched-deadline.txt)311
-rw-r--r--Documentation/scheduler/sched-design-CFS.rst (renamed from Documentation/scheduler/sched-design-CFS.txt)15
-rw-r--r--Documentation/scheduler/sched-domains.rst (renamed from Documentation/scheduler/sched-domains.txt)8
-rw-r--r--Documentation/scheduler/sched-energy.rst (renamed from Documentation/scheduler/sched-energy.txt)47
-rw-r--r--Documentation/scheduler/sched-nice-design.rst (renamed from Documentation/scheduler/sched-nice-design.txt)6
-rw-r--r--Documentation/scheduler/sched-rt-group.rst (renamed from Documentation/scheduler/sched-rt-group.txt)28
-rw-r--r--Documentation/scheduler/sched-stats.rst (renamed from Documentation/scheduler/sched-stats.txt)35
-rw-r--r--Documentation/scheduler/text_files.rst5
-rw-r--r--Documentation/vm/numa.rst2
-rw-r--r--init/Kconfig6
-rw-r--r--kernel/sched/deadline.c2
16 files changed, 340 insertions, 242 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids
index 28f14695a852..4182b7061816 100644
--- a/Documentation/ABI/testing/sysfs-kernel-uids
+++ b/Documentation/ABI/testing/sysfs-kernel-uids
@@ -11,4 +11,4 @@ Description:
example would be, if User A has shares = 1024 and user
B has shares = 2048, User B will get twice the CPU
bandwidth user A will. For more details refer
- Documentation/scheduler/sched-design-CFS.txt
+ Documentation/scheduler/sched-design-CFS.rst
diff --git a/Documentation/scheduler/completion.txt b/Documentation/scheduler/completion.rst
index e5b9df4d8078..9f039b4f4b09 100644
--- a/Documentation/scheduler/completion.txt
+++ b/Documentation/scheduler/completion.rst
@@ -1,3 +1,4 @@
+================================================
Completions - "wait for completion" barrier APIs
================================================
@@ -46,7 +47,7 @@ it has to wait for it.
To use completions you need to #include <linux/completion.h> and
create a static or dynamic variable of type 'struct completion',
-which has only two fields:
+which has only two fields::
struct completion {
unsigned int done;
@@ -57,7 +58,7 @@ This provides the ->wait waitqueue to place tasks on for waiting (if any), and
the ->done completion flag for indicating whether it's completed or not.
Completions should be named to refer to the event that is being synchronized on.
-A good example is:
+A good example is::
wait_for_completion(&early_console_added);
@@ -81,7 +82,7 @@ have taken place, even if these wait functions return prematurely due to a timeo
or a signal triggering.
Initializing of dynamically allocated completion objects is done via a call to
-init_completion():
+init_completion()::
init_completion(&dynamic_object->done);
@@ -100,7 +101,8 @@ but be aware of other races.
For static declaration and initialization, macros are available.
-For static (or global) declarations in file scope you can use DECLARE_COMPLETION():
+For static (or global) declarations in file scope you can use
+DECLARE_COMPLETION()::
static DECLARE_COMPLETION(setup_done);
DECLARE_COMPLETION(setup_done);
@@ -111,7 +113,7 @@ initialized to 'not done' and doesn't require an init_completion() call.
When a completion is declared as a local variable within a function,
then the initialization should always use DECLARE_COMPLETION_ONSTACK()
explicitly, not just to make lockdep happy, but also to make it clear
-that limited scope had been considered and is intentional:
+that limited scope had been considered and is intentional::
DECLARE_COMPLETION_ONSTACK(setup_done)
@@ -140,11 +142,11 @@ Waiting for completions:
------------------------
For a thread to wait for some concurrent activity to finish, it
-calls wait_for_completion() on the initialized completion structure:
+calls wait_for_completion() on the initialized completion structure::
void wait_for_completion(struct completion *done)
-A typical usage scenario is:
+A typical usage scenario is::
CPU#1 CPU#2
@@ -192,17 +194,17 @@ A common problem that occurs is to have unclean assignment of return types,
so take care to assign return-values to variables of the proper type.
Checking for the specific meaning of return values also has been found
-to be quite inaccurate, e.g. constructs like:
+to be quite inaccurate, e.g. constructs like::
if (!wait_for_completion_interruptible_timeout(...))
... would execute the same code path for successful completion and for the
-interrupted case - which is probably not what you want.
+interrupted case - which is probably not what you want::
int wait_for_completion_interruptible(struct completion *done)
This function marks the task TASK_INTERRUPTIBLE while it is waiting.
-If a signal was received while waiting it will return -ERESTARTSYS; 0 otherwise.
+If a signal was received while waiting it will return -ERESTARTSYS; 0 otherwise::
unsigned long wait_for_completion_timeout(struct completion *done, unsigned long timeout)
@@ -214,7 +216,7 @@ Timeouts are preferably calculated with msecs_to_jiffies() or usecs_to_jiffies()
to make the code largely HZ-invariant.
If the returned timeout value is deliberately ignored a comment should probably explain
-why (e.g. see drivers/mfd/wm8350-core.c wm8350_read_auxadc()).
+why (e.g. see drivers/mfd/wm8350-core.c wm8350_read_auxadc())::
long wait_for_completion_interruptible_timeout(struct completion *done, unsigned long timeout)
@@ -225,14 +227,14 @@ jiffies if completion occurred.
Further variants include _killable which uses TASK_KILLABLE as the
designated tasks state and will return -ERESTARTSYS if it is interrupted,
-or 0 if completion was achieved. There is a _timeout variant as well:
+or 0 if completion was achieved. There is a _timeout variant as well::
long wait_for_completion_killable(struct completion *done)
long wait_for_completion_killable_timeout(struct completion *done, unsigned long timeout)
The _io variants wait_for_completion_io() behave the same as the non-_io
variants, except for accounting waiting time as 'waiting on IO', which has
-an impact on how the task is accounted in scheduling/IO stats:
+an impact on how the task is accounted in scheduling/IO stats::
void wait_for_completion_io(struct completion *done)
unsigned long wait_for_completion_io_timeout(struct completion *done, unsigned long timeout)
@@ -243,11 +245,11 @@ Signaling completions:
A thread that wants to signal that the conditions for continuation have been
achieved calls complete() to signal exactly one of the waiters that it can
-continue:
+continue::
void complete(struct completion *done)
-... or calls complete_all() to signal all current and future waiters:
+... or calls complete_all() to signal all current and future waiters::
void complete_all(struct completion *done)
@@ -268,7 +270,7 @@ probably are a design bug.
Signaling completion from IRQ context is fine as it will appropriately
lock with spin_lock_irqsave()/spin_unlock_irqrestore() and it will never
-sleep.
+sleep.
try_wait_for_completion()/completion_done():
@@ -276,14 +278,14 @@ try_wait_for_completion()/completion_done():
The try_wait_for_completion() function will not put the thread on the wait
queue but rather returns false if it would need to enqueue (block) the thread,
-else it consumes one posted completion and returns true.
+else it consumes one posted completion and returns true::
bool try_wait_for_completion(struct completion *done)
Finally, to check the state of a completion without changing it in any way,
call completion_done(), which returns false if there are no posted
completions that were not yet consumed by waiters (implying that there are
-waiters) and true otherwise;
+waiters) and true otherwise::
bool completion_done(struct completion *done)
diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
new file mode 100644
index 000000000000..058be77a4c34
--- /dev/null
+++ b/Documentation/scheduler/index.rst
@@ -0,0 +1,29 @@
+:orphan:
+
+===============
+Linux Scheduler
+===============
+
+.. toctree::
+ :maxdepth: 1
+
+
+ completion
+ sched-arch
+ sched-bwc
+ sched-deadline
+ sched-design-CFS
+ sched-domains
+ sched-energy
+ sched-nice-design
+ sched-rt-group
+ sched-stats
+
+ text_files
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.rst
index a2f27bbf2cba..0eaec669790a 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.rst
@@ -1,4 +1,6 @@
- CPU Scheduler implementation hints for architecture specific code
+=================================================================
+CPU Scheduler implementation hints for architecture specific code
+=================================================================
Nick Piggin, 2005
@@ -35,9 +37,10 @@ Your cpu_idle routines need to obey the following rules:
4. The only time interrupts need to be disabled when checking
need_resched is if we are about to sleep the processor until
the next interrupt (this doesn't provide any protection of
- need_resched, it prevents losing an interrupt).
+ need_resched, it prevents losing an interrupt):
+
+ 4a. Common problem with this type of sleep appears to be::
- 4a. Common problem with this type of sleep appears to be:
local_irq_disable();
if (!need_resched()) {
local_irq_enable();
@@ -51,10 +54,10 @@ Your cpu_idle routines need to obey the following rules:
although it may be reasonable to do some background work or enter
a low CPU priority.
- 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter
- an interrupt sleep, it needs to be cleared then a memory
- barrier issued (followed by a test of need_resched with
- interrupts disabled, as explained in 3).
+ - 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter
+ an interrupt sleep, it needs to be cleared then a memory
+ barrier issued (followed by a test of need_resched with
+ interrupts disabled, as explained in 3).
arch/x86/kernel/process.c has examples of both polling and
sleeping idle functions.
@@ -71,4 +74,3 @@ sh64 - Is sleeping racy vs interrupts? (See #4a)
sparc - IRQs on at this point(?), change local_irq_save to _disable.
- TODO: needs secondary CPUs to disable preempt (See #1)
-
diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.rst
index f6b1873f68ab..3a9064219656 100644
--- a/Documentation/scheduler/sched-bwc.txt
+++ b/Documentation/scheduler/sched-bwc.rst
@@ -1,8 +1,9 @@
+=====================
CFS Bandwidth Control
=====================
[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
- The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.txt ]
+ The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst ]
CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
specification of the maximum CPU bandwidth available to a group or hierarchy.
@@ -27,7 +28,8 @@ cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
cpu.cfs_period_us: the length of a period (in microseconds)
cpu.stat: exports throttling statistics [explained further below]
-The default values are:
+The default values are::
+
cpu.cfs_period_us=100ms
cpu.cfs_quota=-1
@@ -55,7 +57,8 @@ For efficiency run-time is transferred between the global pool and CPU local
on large systems. The amount transferred each time such an update is required
is described as the "slice".
-This is tunable via procfs:
+This is tunable via procfs::
+
/proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms)
Larger slice values will reduce transfer overheads, while smaller values allow
@@ -66,6 +69,7 @@ Statistics
A group's bandwidth statistics are exported via 3 fields in cpu.stat.
cpu.stat:
+
- nr_periods: Number of enforcement intervals that have elapsed.
- nr_throttled: Number of times the group has been throttled/limited.
- throttled_time: The total time duration (in nanoseconds) for which entities
@@ -78,12 +82,15 @@ Hierarchical considerations
The interface enforces that an individual entity's bandwidth is always
attainable, that is: max(c_i) <= C. However, over-subscription in the
aggregate case is explicitly allowed to enable work-conserving semantics
-within a hierarchy.
+within a hierarchy:
+
e.g. \Sum (c_i) may exceed C
+
[ Where C is the parent's bandwidth, and c_i its children ]
There are two ways in which a group may become throttled:
+
a. it fully consumes its own quota within a period
b. a parent's quota is fully consumed within its period
@@ -92,7 +99,7 @@ be allowed to until the parent's runtime is refreshed.
Examples
--------
-1. Limit a group to 1 CPU worth of runtime.
+1. Limit a group to 1 CPU worth of runtime::
If period is 250ms and quota is also 250ms, the group will get
1 CPU worth of runtime every 250ms.
@@ -100,10 +107,10 @@ Examples
# echo 250000 > cpu.cfs_quota_us /* quota = 250ms */
# echo 250000 > cpu.cfs_period_us /* period = 250ms */
-2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine.
+2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine
- With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
- runtime every 500ms.
+ With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
+ runtime every 500ms::
# echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */
# echo 500000 > cpu.cfs_period_us /* period = 500ms */
@@ -112,11 +119,10 @@ Examples
3. Limit a group to 20% of 1 CPU.
- With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU.
+ With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU::
# echo 10000 > cpu.cfs_quota_us /* quota = 10ms */
# echo 50000 > cpu.cfs_period_us /* period = 50ms */
- By using a small period here we are ensuring a consistent latency
- response at the expense of burst capacity.
-
+ By using a small period here we are ensuring a consistent latency
+ response at the expense of burst capacity.
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.rst
index b14e03ff3528..873fb2775ca6 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.rst
@@ -1,29 +1,29 @@
- Deadline Task Scheduling
- ------------------------
-
-CONTENTS
-========
-
- 0. WARNING
- 1. Overview
- 2. Scheduling algorithm
- 2.1 Main algorithm
- 2.2 Bandwidth reclaiming
- 3. Scheduling Real-Time Tasks
- 3.1 Definitions
- 3.2 Schedulability Analysis for Uniprocessor Systems
- 3.3 Schedulability Analysis for Multiprocessor Systems
- 3.4 Relationship with SCHED_DEADLINE Parameters
- 4. Bandwidth management
- 4.1 System-wide settings
- 4.2 Task interface
- 4.3 Default behavior
- 4.4 Behavior of sched_yield()
- 5. Tasks CPU affinity
- 5.1 SCHED_DEADLINE and cpusets HOWTO
- 6. Future plans
- A. Test suite
- B. Minimal main()
+========================
+Deadline Task Scheduling
+========================
+
+.. CONTENTS
+
+ 0. WARNING
+ 1. Overview
+ 2. Scheduling algorithm
+ 2.1 Main algorithm
+ 2.2 Bandwidth reclaiming
+ 3. Scheduling Real-Time Tasks
+ 3.1 Definitions
+ 3.2 Schedulability Analysis for Uniprocessor Systems
+ 3.3 Schedulability Analysis for Multiprocessor Systems
+ 3.4 Relationship with SCHED_DEADLINE Parameters
+ 4. Bandwidth management
+ 4.1 System-wide settings
+ 4.2 Task interface
+ 4.3 Default behavior
+ 4.4 Behavior of sched_yield()
+ 5. Tasks CPU affinity
+ 5.1 SCHED_DEADLINE and cpusets HOWTO
+ 6. Future plans
+ A. Test suite
+ B. Minimal main()
0. WARNING
@@ -44,7 +44,7 @@ CONTENTS
2. Scheduling algorithm
-==================
+=======================
2.1 Main algorithm
------------------
@@ -80,7 +80,7 @@ CONTENTS
a "remaining runtime". These two parameters are initially set to 0;
- When a SCHED_DEADLINE task wakes up (becomes ready for execution),
- the scheduler checks if
+ the scheduler checks if::
remaining runtime runtime
---------------------------------- > ---------
@@ -97,7 +97,7 @@ CONTENTS
left unchanged;
- When a SCHED_DEADLINE task executes for an amount of time t, its
- remaining runtime is decreased as
+ remaining runtime is decreased as::
remaining runtime = remaining runtime - t
@@ -112,7 +112,7 @@ CONTENTS
- When the current time is equal to the replenishment time of a
throttled task, the scheduling deadline and the remaining runtime are
- updated as
+ updated as::
scheduling deadline = scheduling deadline + period
remaining runtime = remaining runtime + runtime
@@ -129,7 +129,7 @@ CONTENTS
Reclamation of Unused Bandwidth) algorithm [15, 16, 17] and it is enabled
when flag SCHED_FLAG_RECLAIM is set.
- The following diagram illustrates the state names for tasks handled by GRUB:
+ The following diagram illustrates the state names for tasks handled by GRUB::
------------
(d) | Active |
@@ -168,7 +168,7 @@ CONTENTS
breaking the real-time guarantees.
The 0-lag time for a task entering the ActiveNonContending state is
- computed as
+ computed as::
(runtime * dl_period)
deadline - ---------------------
@@ -183,7 +183,7 @@ CONTENTS
the task's utilization must be removed from the previous runqueue's active
utilization and must be added to the new runqueue's active utilization.
In order to avoid races between a task waking up on a runqueue while the
- "inactive timer" is running on a different CPU, the "dl_non_contending"
+ "inactive timer" is running on a different CPU, the "dl_non_contending"
flag is used to indicate that a task is not on a runqueue but is active
(so, the flag is set when the task blocks and is cleared when the
"inactive timer" fires or when the task wakes up).
@@ -222,36 +222,36 @@ CONTENTS
Let's now see a trivial example of two deadline tasks with runtime equal
- to 4 and period equal to 8 (i.e., bandwidth equal to 0.5):
-
- A Task T1
- |
- | |
- | |
- |-------- |----
- | | V
- |---|---|---|---|---|---|---|---|--------->t
- 0 1 2 3 4 5 6 7 8
-
-
- A Task T2
- |
- | |
- | |
- | ------------------------|
- | | V
- |---|---|---|---|---|---|---|---|--------->t
- 0 1 2 3 4 5 6 7 8
-
-
- A running_bw
- |
- 1 ----------------- ------
- | | |
- 0.5- -----------------
- | |
- |---|---|---|---|---|---|---|---|--------->t
- 0 1 2 3 4 5 6 7 8
+ to 4 and period equal to 8 (i.e., bandwidth equal to 0.5)::
+
+ A Task T1
+ |
+ | |
+ | |
+ |-------- |----
+ | | V
+ |---|---|---|---|---|---|---|---|--------->t
+ 0 1 2 3 4 5 6 7 8
+
+
+ A Task T2
+ |
+ | |
+ | |
+ | ------------------------|
+ | | V
+ |---|---|---|---|---|---|---|---|--------->t
+ 0 1 2 3 4 5 6 7 8
+
+
+ A running_bw
+ |
+ 1 ----------------- ------
+ | | |
+ 0.5- -----------------
+ | |
+ |---|---|---|---|---|---|---|---|--------->t
+ 0 1 2 3 4 5 6 7 8
- Time t = 0:
@@ -284,7 +284,7 @@ CONTENTS
2.3 Energy-aware scheduling
-------------------------
+---------------------------
When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the
GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum
@@ -299,15 +299,20 @@ CONTENTS
3. Scheduling Real-Time Tasks
=============================
- * BIG FAT WARNING ******************************************************
- *
- * This section contains a (not-thorough) summary on classical deadline
- * scheduling theory, and how it applies to SCHED_DEADLINE.
- * The reader can "safely" skip to Section 4 if only interested in seeing
- * how the scheduling policy can be used. Anyway, we strongly recommend
- * to come back here and continue reading (once the urge for testing is
- * satisfied :P) to be sure of fully understanding all technical details.
- ************************************************************************
+
+
+ .. BIG FAT WARNING ******************************************************
+
+ .. warning::
+
+ This section contains a (not-thorough) summary on classical deadline
+ scheduling theory, and how it applies to SCHED_DEADLINE.
+ The reader can "safely" skip to Section 4 if only interested in seeing
+ how the scheduling policy can be used. Anyway, we strongly recommend
+ to come back here and continue reading (once the urge for testing is
+ satisfied :P) to be sure of fully understanding all technical details.
+
+ .. ************************************************************************
There are no limitations on what kind of task can exploit this new
scheduling discipline, even if it must be said that it is particularly
@@ -329,6 +334,7 @@ CONTENTS
sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally,
d_j = r_j + D, where D is the task's relative deadline.
Summing up, a real-time task can be described as
+
Task = (WCET, D, P)
The utilization of a real-time task is defined as the ratio between its
@@ -352,13 +358,15 @@ CONTENTS
between the finishing time of a job and its absolute deadline).
More precisely, it can be proven that using a global EDF scheduler the
maximum tardiness of each task is smaller or equal than
+
((M − 1) · WCET_max − WCET_min)/(M − (M − 2) · U_max) + WCET_max
+
where WCET_max = max{WCET_i} is the maximum WCET, WCET_min=min{WCET_i}
is the minimum WCET, and U_max = max{WCET_i/P_i} is the maximum
utilization[12].
3.2 Schedulability Analysis for Uniprocessor Systems
-------------------------
+----------------------------------------------------
If M=1 (uniprocessor system), or in case of partitioned scheduling (each
real-time task is statically assigned to one and only one CPU), it is
@@ -370,7 +378,9 @@ CONTENTS
a task as WCET_i/min{D_i,P_i}, and EDF is able to respect all the deadlines
of all the tasks running on a CPU if the sum of the densities of the tasks
running on such a CPU is smaller or equal than 1:
+
sum(WCET_i / min{D_i, P_i}) <= 1
+
It is important to notice that this condition is only sufficient, and not
necessary: there are task sets that are schedulable, but do not respect the
condition. For example, consider the task set {Task_1,Task_2} composed by
@@ -379,7 +389,9 @@ CONTENTS
(Task_1 is scheduled as soon as it is released, and finishes just in time
to respect its deadline; Task_2 is scheduled immediately after Task_1, hence
its response time cannot be larger than 50ms + 10ms = 60ms) even if
+
50 / min{50,100} + 10 / min{100, 100} = 50 / 50 + 10 / 100 = 1.1
+
Of course it is possible to test the exact schedulability of tasks with
D_i != P_i (checking a condition that is both sufficient and necessary),
but this cannot be done by comparing the total utilization or density with
@@ -399,7 +411,7 @@ CONTENTS
4 Linux uses an admission test based on the tasks' utilizations.
3.3 Schedulability Analysis for Multiprocessor Systems
-------------------------
+------------------------------------------------------
On multiprocessor systems with global EDF scheduling (non partitioned
systems), a sufficient test for schedulability can not be based on the
@@ -428,7 +440,9 @@ CONTENTS
between total utilization (or density) and a fixed constant. If all tasks
have D_i = P_i, a sufficient schedulability condition can be expressed in
a simple way:
+
sum(WCET_i / P_i) <= M - (M - 1) · U_max
+
where U_max = max{WCET_i / P_i}[10]. Notice that for U_max = 1,
M - (M - 1) · U_max becomes M - M + 1 = 1 and this schedulability condition
just confirms the Dhall's effect. A more complete survey of the literature
@@ -447,7 +461,7 @@ CONTENTS
the tasks are limited.
3.4 Relationship with SCHED_DEADLINE Parameters
-------------------------
+-----------------------------------------------
Finally, it is important to understand the relationship between the
SCHED_DEADLINE scheduling parameters described in Section 2 (runtime,
@@ -473,6 +487,7 @@ CONTENTS
this task, as it is not possible to respect its temporal constraints.
References:
+
1 - C. L. Liu and J. W. Layland. Scheduling algorithms for multiprogram-
ming in a hard-real-time environment. Journal of the Association for
Computing Machinery, 20(1), 1973.
@@ -550,7 +565,7 @@ CONTENTS
The interface used to control the CPU bandwidth that can be allocated
to -deadline tasks is similar to the one already used for -rt
tasks with real-time group scheduling (a.k.a. RT-throttling - see
- Documentation/scheduler/sched-rt-group.txt), and is based on readable/
+ Documentation/scheduler/sched-rt-group.rst), and is based on readable/
writable control files located in procfs (for system wide settings).
Notice that per-group settings (controlled through cgroupfs) are still not
defined for -deadline tasks, because more discussion is needed in order to
@@ -596,11 +611,13 @@ CONTENTS
Specifying a periodic/sporadic task that executes for a given amount of
runtime at each instance, and that is scheduled according to the urgency of
its own timing constraints needs, in general, a way of declaring:
+
- a (maximum/typical) instance execution time,
- a minimum interval between consecutive instances,
- a time constraint by which each instance must be completed.
Therefore:
+
* a new struct sched_attr, containing all the necessary fields is
provided;
* the new scheduling related syscalls that manipulate it, i.e.,
@@ -658,21 +675,21 @@ CONTENTS
------------------------------------
An example of a simple configuration (pin a -deadline task to CPU0)
- follows (rt-app is used to create a -deadline task).
-
- mkdir /dev/cpuset
- mount -t cgroup -o cpuset cpuset /dev/cpuset
- cd /dev/cpuset
- mkdir cpu0
- echo 0 > cpu0/cpuset.cpus
- echo 0 > cpu0/cpuset.mems
- echo 1 > cpuset.cpu_exclusive
- echo 0 > cpuset.sched_load_balance
- echo 1 > cpu0/cpuset.cpu_exclusive
- echo 1 > cpu0/cpuset.mem_exclusive
- echo $$ > cpu0/tasks
- rt-app -t 100000:10000:d:0 -D5 (it is now actually superfluous to specify
- task affinity)
+ follows (rt-app is used to create a -deadline task)::
+
+ mkdir /dev/cpuset
+ mount -t cgroup -o cpuset cpuset /dev/cpuset
+ cd /dev/cpuset
+ mkdir cpu0
+ echo 0 > cpu0/cpuset.cpus
+ echo 0 > cpu0/cpuset.mems
+ echo 1 > cpuset.cpu_exclusive
+ echo 0 > cpuset.sched_load_balance
+ echo 1 > cpu0/cpuset.cpu_exclusive
+ echo 1 > cpu0/cpuset.mem_exclusive
+ echo $$ > cpu0/tasks
+ rt-app -t 100000:10000:d:0 -D5 # it is now actually superfluous to specify
+ # task affinity
6. Future plans
===============
@@ -711,7 +728,7 @@ Appendix A. Test suite
rt-app is available at: https://github.com/scheduler-tools/rt-app.
Thread parameters can be specified from the command line, with something like
- this:
+ this::
# rt-app -t 100000:10000:d -t 150000:20000:f:10 -D5
@@ -721,27 +738,27 @@ Appendix A. Test suite
of 5 seconds.
More interestingly, configurations can be described with a json file that
- can be passed as input to rt-app with something like this:
+ can be passed as input to rt-app with something like this::
# rt-app my_config.json
The parameters that can be specified with the second method are a superset
of the command line options. Please refer to rt-app documentation for more
- details (<rt-app-sources>/doc/*.json).
+ details (`<rt-app-sources>/doc/*.json`).
The second testing application is a modification of schedtool, called
schedtool-dl, which can be used to setup SCHED_DEADLINE parameters for a
certain pid/application. schedtool-dl is available at:
https://github.com/scheduler-tools/schedtool-dl.git.
- The usage is straightforward:
+ The usage is straightforward::
# schedtool -E -t 10000000:100000000 -e ./my_cpuhog_app
With this, my_cpuhog_app is put to run inside a SCHED_DEADLINE reservation
of 10ms every 100ms (note that parameters are expressed in microseconds).
You can also use schedtool to create a reservation for an already running
- application, given that you know its pid:
+ application, given that you know its pid::
# schedtool -E -t 10000000:100000000 my_app_pid
@@ -750,43 +767,43 @@ Appendix B. Minimal main()
We provide in what follows a simple (ugly) self-contained code snippet
showing how SCHED_DEADLINE reservations can be created by a real-time
- application developer.
-
- #define _GNU_SOURCE
- #include <unistd.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- #include <linux/unistd.h>
- #include <linux/kernel.h>
- #include <linux/types.h>
- #include <sys/syscall.h>
- #include <pthread.h>
-
- #define gettid() syscall(__NR_gettid)
-
- #define SCHED_DEADLINE 6
-
- /* XXX use the proper syscall numbers */
- #ifdef __x86_64__
- #define __NR_sched_setattr 314
- #define __NR_sched_getattr 315
- #endif
-
- #ifdef __i386__
- #define __NR_sched_setattr 351
- #define __NR_sched_getattr 352
- #endif
-
- #ifdef __arm__
- #define __NR_sched_setattr 380
- #define __NR_sched_getattr 381
- #endif
-
- static volatile int done;
-
- struct sched_attr {
+ application developer::
+
+ #define _GNU_SOURCE
+ #include <unistd.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <time.h>
+ #include <linux/unistd.h>
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <sys/syscall.h>
+ #include <pthread.h>
+
+ #define gettid() syscall(__NR_gettid)
+
+ #define SCHED_DEADLINE 6
+
+ /* XXX use the proper syscall numbers */
+ #ifdef __x86_64__
+ #define __NR_sched_setattr 314
+ #define __NR_sched_getattr 315
+ #endif
+
+ #ifdef __i386__
+ #define __NR_sched_setattr 351
+ #define __NR_sched_getattr 352
+ #endif
+
+ #ifdef __arm__
+ #define __NR_sched_setattr 380
+ #define __NR_sched_getattr 381
+ #endif
+
+ static volatile int done;
+
+ struct sched_attr {
__u32 size;
__u32 sched_policy;
@@ -802,25 +819,25 @@ Appendix B. Minimal main()
__u64 sched_runtime;
__u64 sched_deadline;
__u64 sched_period;
- };
+ };
- int sched_setattr(pid_t pid,
+ int sched_setattr(pid_t pid,
const struct sched_attr *attr,
unsigned int flags)
- {
+ {
return syscall(__NR_sched_setattr, pid, attr, flags);
- }
+ }
- int sched_getattr(pid_t pid,
+ int sched_getattr(pid_t pid,
struct sched_attr *attr,
unsigned int size,
unsigned int flags)
- {
+ {
return syscall(__NR_sched_getattr, pid, attr, size, flags);
- }
+ }
- void *run_deadline(void *data)
- {
+ void *run_deadline(void *data)
+ {
struct sched_attr attr;
int x = 0;
int ret;
@@ -851,10 +868,10 @@ Appendix B. Minimal main()
printf("deadline thread dies [%ld]\n", gettid());
return NULL;
- }
+ }
- int main (int argc, char **argv)
- {
+ int main (int argc, char **argv)
+ {
pthread_t thread;
printf("main thread [%ld]\n", gettid());
@@ -868,4 +885,4 @@ Appendix B. Minimal main()
printf("main dies [%ld]\n", gettid());
return 0;
- }
+ }
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.rst
index edd861c94c1b..82406685365a 100644
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.rst
@@ -1,9 +1,10 @@
- =============
- CFS Scheduler
- =============
+=============
+CFS Scheduler
+=============
1. OVERVIEW
+============
CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
scheduler implemented by Ingo Molnar and merged in Linux 2.6.23. It is the
@@ -27,6 +28,7 @@ is its actual runtime normalized to the total number of running tasks.
2. FEW IMPLEMENTATION DETAILS
+============