diff options
author | Mauro Carvalho Chehab <mchehab+samsung@kernel.org> | 2019-06-12 14:53:03 -0300 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2019-06-14 14:32:18 -0600 |
commit | d6a3b247627a3bc0551504eb305d624cc6fb5453 (patch) | |
tree | d2605772889c71e02d4aeda5a616567153d68b6a | |
parent | d223884089734cc637c4e5458870d69f6ded9f89 (diff) |
docs: scheduler: convert docs to ReST and rename to *.rst
In order to prepare to add them to the Kernel API book,
convert the files to ReST format.
The conversion is actually:
- add blank lines and identation in order to identify paragraphs;
- fix tables markups;
- add some lists markups;
- mark literal blocks;
- adjust title markups.
At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
-rw-r--r-- | Documentation/ABI/testing/sysfs-kernel-uids | 2 | ||||
-rw-r--r-- | Documentation/scheduler/completion.rst (renamed from Documentation/scheduler/completion.txt) | 38 | ||||
-rw-r--r-- | Documentation/scheduler/index.rst | 29 | ||||
-rw-r--r-- | Documentation/scheduler/sched-arch.rst (renamed from Documentation/scheduler/sched-arch.txt) | 18 | ||||
-rw-r--r-- | Documentation/scheduler/sched-bwc.rst (renamed from Documentation/scheduler/sched-bwc.txt) | 30 | ||||
-rw-r--r-- | Documentation/scheduler/sched-deadline.rst (renamed from Documentation/scheduler/sched-deadline.txt) | 311 | ||||
-rw-r--r-- | Documentation/scheduler/sched-design-CFS.rst (renamed from Documentation/scheduler/sched-design-CFS.txt) | 15 | ||||
-rw-r--r-- | Documentation/scheduler/sched-domains.rst (renamed from Documentation/scheduler/sched-domains.txt) | 8 | ||||
-rw-r--r-- | Documentation/scheduler/sched-energy.rst (renamed from Documentation/scheduler/sched-energy.txt) | 47 | ||||
-rw-r--r-- | Documentation/scheduler/sched-nice-design.rst (renamed from Documentation/scheduler/sched-nice-design.txt) | 6 | ||||
-rw-r--r-- | Documentation/scheduler/sched-rt-group.rst (renamed from Documentation/scheduler/sched-rt-group.txt) | 28 | ||||
-rw-r--r-- | Documentation/scheduler/sched-stats.rst (renamed from Documentation/scheduler/sched-stats.txt) | 35 | ||||
-rw-r--r-- | Documentation/scheduler/text_files.rst | 5 | ||||
-rw-r--r-- | Documentation/vm/numa.rst | 2 | ||||
-rw-r--r-- | init/Kconfig | 6 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 2 |
16 files changed, 340 insertions, 242 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids index 28f14695a852..4182b7061816 100644 --- a/Documentation/ABI/testing/sysfs-kernel-uids +++ b/Documentation/ABI/testing/sysfs-kernel-uids @@ -11,4 +11,4 @@ Description: example would be, if User A has shares = 1024 and user B has shares = 2048, User B will get twice the CPU bandwidth user A will. For more details refer - Documentation/scheduler/sched-design-CFS.txt + Documentation/scheduler/sched-design-CFS.rst diff --git a/Documentation/scheduler/completion.txt b/Documentation/scheduler/completion.rst index e5b9df4d8078..9f039b4f4b09 100644 --- a/Documentation/scheduler/completion.txt +++ b/Documentation/scheduler/completion.rst @@ -1,3 +1,4 @@ +================================================ Completions - "wait for completion" barrier APIs ================================================ @@ -46,7 +47,7 @@ it has to wait for it. To use completions you need to #include <linux/completion.h> and create a static or dynamic variable of type 'struct completion', -which has only two fields: +which has only two fields:: struct completion { unsigned int done; @@ -57,7 +58,7 @@ This provides the ->wait waitqueue to place tasks on for waiting (if any), and the ->done completion flag for indicating whether it's completed or not. Completions should be named to refer to the event that is being synchronized on. -A good example is: +A good example is:: wait_for_completion(&early_console_added); @@ -81,7 +82,7 @@ have taken place, even if these wait functions return prematurely due to a timeo or a signal triggering. Initializing of dynamically allocated completion objects is done via a call to -init_completion(): +init_completion():: init_completion(&dynamic_object->done); @@ -100,7 +101,8 @@ but be aware of other races. For static declaration and initialization, macros are available. -For static (or global) declarations in file scope you can use DECLARE_COMPLETION(): +For static (or global) declarations in file scope you can use +DECLARE_COMPLETION():: static DECLARE_COMPLETION(setup_done); DECLARE_COMPLETION(setup_done); @@ -111,7 +113,7 @@ initialized to 'not done' and doesn't require an init_completion() call. When a completion is declared as a local variable within a function, then the initialization should always use DECLARE_COMPLETION_ONSTACK() explicitly, not just to make lockdep happy, but also to make it clear -that limited scope had been considered and is intentional: +that limited scope had been considered and is intentional:: DECLARE_COMPLETION_ONSTACK(setup_done) @@ -140,11 +142,11 @@ Waiting for completions: ------------------------ For a thread to wait for some concurrent activity to finish, it -calls wait_for_completion() on the initialized completion structure: +calls wait_for_completion() on the initialized completion structure:: void wait_for_completion(struct completion *done) -A typical usage scenario is: +A typical usage scenario is:: CPU#1 CPU#2 @@ -192,17 +194,17 @@ A common problem that occurs is to have unclean assignment of return types, so take care to assign return-values to variables of the proper type. Checking for the specific meaning of return values also has been found -to be quite inaccurate, e.g. constructs like: +to be quite inaccurate, e.g. constructs like:: if (!wait_for_completion_interruptible_timeout(...)) ... would execute the same code path for successful completion and for the -interrupted case - which is probably not what you want. +interrupted case - which is probably not what you want:: int wait_for_completion_interruptible(struct completion *done) This function marks the task TASK_INTERRUPTIBLE while it is waiting. -If a signal was received while waiting it will return -ERESTARTSYS; 0 otherwise. +If a signal was received while waiting it will return -ERESTARTSYS; 0 otherwise:: unsigned long wait_for_completion_timeout(struct completion *done, unsigned long timeout) @@ -214,7 +216,7 @@ Timeouts are preferably calculated with msecs_to_jiffies() or usecs_to_jiffies() to make the code largely HZ-invariant. If the returned timeout value is deliberately ignored a comment should probably explain -why (e.g. see drivers/mfd/wm8350-core.c wm8350_read_auxadc()). +why (e.g. see drivers/mfd/wm8350-core.c wm8350_read_auxadc()):: long wait_for_completion_interruptible_timeout(struct completion *done, unsigned long timeout) @@ -225,14 +227,14 @@ jiffies if completion occurred. Further variants include _killable which uses TASK_KILLABLE as the designated tasks state and will return -ERESTARTSYS if it is interrupted, -or 0 if completion was achieved. There is a _timeout variant as well: +or 0 if completion was achieved. There is a _timeout variant as well:: long wait_for_completion_killable(struct completion *done) long wait_for_completion_killable_timeout(struct completion *done, unsigned long timeout) The _io variants wait_for_completion_io() behave the same as the non-_io variants, except for accounting waiting time as 'waiting on IO', which has -an impact on how the task is accounted in scheduling/IO stats: +an impact on how the task is accounted in scheduling/IO stats:: void wait_for_completion_io(struct completion *done) unsigned long wait_for_completion_io_timeout(struct completion *done, unsigned long timeout) @@ -243,11 +245,11 @@ Signaling completions: A thread that wants to signal that the conditions for continuation have been achieved calls complete() to signal exactly one of the waiters that it can -continue: +continue:: void complete(struct completion *done) -... or calls complete_all() to signal all current and future waiters: +... or calls complete_all() to signal all current and future waiters:: void complete_all(struct completion *done) @@ -268,7 +270,7 @@ probably are a design bug. Signaling completion from IRQ context is fine as it will appropriately lock with spin_lock_irqsave()/spin_unlock_irqrestore() and it will never -sleep. +sleep. try_wait_for_completion()/completion_done(): @@ -276,14 +278,14 @@ try_wait_for_completion()/completion_done(): The try_wait_for_completion() function will not put the thread on the wait queue but rather returns false if it would need to enqueue (block) the thread, -else it consumes one posted completion and returns true. +else it consumes one posted completion and returns true:: bool try_wait_for_completion(struct completion *done) Finally, to check the state of a completion without changing it in any way, call completion_done(), which returns false if there are no posted completions that were not yet consumed by waiters (implying that there are -waiters) and true otherwise; +waiters) and true otherwise:: bool completion_done(struct completion *done) diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst new file mode 100644 index 000000000000..058be77a4c34 --- /dev/null +++ b/Documentation/scheduler/index.rst @@ -0,0 +1,29 @@ +:orphan: + +=============== +Linux Scheduler +=============== + +.. toctree:: + :maxdepth: 1 + + + completion + sched-arch + sched-bwc + sched-deadline + sched-design-CFS + sched-domains + sched-energy + sched-nice-design + sched-rt-group + sched-stats + + text_files + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.rst index a2f27bbf2cba..0eaec669790a 100644 --- a/Documentation/scheduler/sched-arch.txt +++ b/Documentation/scheduler/sched-arch.rst @@ -1,4 +1,6 @@ - CPU Scheduler implementation hints for architecture specific code +================================================================= +CPU Scheduler implementation hints for architecture specific code +================================================================= Nick Piggin, 2005 @@ -35,9 +37,10 @@ Your cpu_idle routines need to obey the following rules: 4. The only time interrupts need to be disabled when checking need_resched is if we are about to sleep the processor until the next interrupt (this doesn't provide any protection of - need_resched, it prevents losing an interrupt). + need_resched, it prevents losing an interrupt): + + 4a. Common problem with this type of sleep appears to be:: - 4a. Common problem with this type of sleep appears to be: local_irq_disable(); if (!need_resched()) { local_irq_enable(); @@ -51,10 +54,10 @@ Your cpu_idle routines need to obey the following rules: although it may be reasonable to do some background work or enter a low CPU priority. - 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter - an interrupt sleep, it needs to be cleared then a memory - barrier issued (followed by a test of need_resched with - interrupts disabled, as explained in 3). + - 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter + an interrupt sleep, it needs to be cleared then a memory + barrier issued (followed by a test of need_resched with + interrupts disabled, as explained in 3). arch/x86/kernel/process.c has examples of both polling and sleeping idle functions. @@ -71,4 +74,3 @@ sh64 - Is sleeping racy vs interrupts? (See #4a) sparc - IRQs on at this point(?), change local_irq_save to _disable. - TODO: needs secondary CPUs to disable preempt (See #1) - diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.rst index f6b1873f68ab..3a9064219656 100644 --- a/Documentation/scheduler/sched-bwc.txt +++ b/Documentation/scheduler/sched-bwc.rst @@ -1,8 +1,9 @@ +===================== CFS Bandwidth Control ===================== [ This document only discusses CPU bandwidth control for SCHED_NORMAL. - The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.txt ] + The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.rst ] CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the specification of the maximum CPU bandwidth available to a group or hierarchy. @@ -27,7 +28,8 @@ cpu.cfs_quota_us: the total available run-time within a period (in microseconds) cpu.cfs_period_us: the length of a period (in microseconds) cpu.stat: exports throttling statistics [explained further below] -The default values are: +The default values are:: + cpu.cfs_period_us=100ms cpu.cfs_quota=-1 @@ -55,7 +57,8 @@ For efficiency run-time is transferred between the global pool and CPU local on large systems. The amount transferred each time such an update is required is described as the "slice". -This is tunable via procfs: +This is tunable via procfs:: + /proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms) Larger slice values will reduce transfer overheads, while smaller values allow @@ -66,6 +69,7 @@ Statistics A group's bandwidth statistics are exported via 3 fields in cpu.stat. cpu.stat: + - nr_periods: Number of enforcement intervals that have elapsed. - nr_throttled: Number of times the group has been throttled/limited. - throttled_time: The total time duration (in nanoseconds) for which entities @@ -78,12 +82,15 @@ Hierarchical considerations The interface enforces that an individual entity's bandwidth is always attainable, that is: max(c_i) <= C. However, over-subscription in the aggregate case is explicitly allowed to enable work-conserving semantics -within a hierarchy. +within a hierarchy: + e.g. \Sum (c_i) may exceed C + [ Where C is the parent's bandwidth, and c_i its children ] There are two ways in which a group may become throttled: + a. it fully consumes its own quota within a period b. a parent's quota is fully consumed within its period @@ -92,7 +99,7 @@ be allowed to until the parent's runtime is refreshed. Examples -------- -1. Limit a group to 1 CPU worth of runtime. +1. Limit a group to 1 CPU worth of runtime:: If period is 250ms and quota is also 250ms, the group will get 1 CPU worth of runtime every 250ms. @@ -100,10 +107,10 @@ Examples # echo 250000 > cpu.cfs_quota_us /* quota = 250ms */ # echo 250000 > cpu.cfs_period_us /* period = 250ms */ -2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine. +2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine - With 500ms period and 1000ms quota, the group can get 2 CPUs worth of - runtime every 500ms. + With 500ms period and 1000ms quota, the group can get 2 CPUs worth of + runtime every 500ms:: # echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */ # echo 500000 > cpu.cfs_period_us /* period = 500ms */ @@ -112,11 +119,10 @@ Examples 3. Limit a group to 20% of 1 CPU. - With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU. + With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU:: # echo 10000 > cpu.cfs_quota_us /* quota = 10ms */ # echo 50000 > cpu.cfs_period_us /* period = 50ms */ - By using a small period here we are ensuring a consistent latency - response at the expense of burst capacity. - + By using a small period here we are ensuring a consistent latency + response at the expense of burst capacity. diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.rst index b14e03ff3528..873fb2775ca6 100644 --- a/Documentation/scheduler/sched-deadline.txt +++ b/Documentation/scheduler/sched-deadline.rst @@ -1,29 +1,29 @@ - Deadline Task Scheduling - ------------------------ - -CONTENTS -======== - - 0. WARNING - 1. Overview - 2. Scheduling algorithm - 2.1 Main algorithm - 2.2 Bandwidth reclaiming - 3. Scheduling Real-Time Tasks - 3.1 Definitions - 3.2 Schedulability Analysis for Uniprocessor Systems - 3.3 Schedulability Analysis for Multiprocessor Systems - 3.4 Relationship with SCHED_DEADLINE Parameters - 4. Bandwidth management - 4.1 System-wide settings - 4.2 Task interface - 4.3 Default behavior - 4.4 Behavior of sched_yield() - 5. Tasks CPU affinity - 5.1 SCHED_DEADLINE and cpusets HOWTO - 6. Future plans - A. Test suite - B. Minimal main() +======================== +Deadline Task Scheduling +======================== + +.. CONTENTS + + 0. WARNING + 1. Overview + 2. Scheduling algorithm + 2.1 Main algorithm + 2.2 Bandwidth reclaiming + 3. Scheduling Real-Time Tasks + 3.1 Definitions + 3.2 Schedulability Analysis for Uniprocessor Systems + 3.3 Schedulability Analysis for Multiprocessor Systems + 3.4 Relationship with SCHED_DEADLINE Parameters + 4. Bandwidth management + 4.1 System-wide settings + 4.2 Task interface + 4.3 Default behavior + 4.4 Behavior of sched_yield() + 5. Tasks CPU affinity + 5.1 SCHED_DEADLINE and cpusets HOWTO + 6. Future plans + A. Test suite + B. Minimal main() 0. WARNING @@ -44,7 +44,7 @@ CONTENTS 2. Scheduling algorithm -================== +======================= 2.1 Main algorithm ------------------ @@ -80,7 +80,7 @@ CONTENTS a "remaining runtime". These two parameters are initially set to 0; - When a SCHED_DEADLINE task wakes up (becomes ready for execution), - the scheduler checks if + the scheduler checks if:: remaining runtime runtime ---------------------------------- > --------- @@ -97,7 +97,7 @@ CONTENTS left unchanged; - When a SCHED_DEADLINE task executes for an amount of time t, its - remaining runtime is decreased as + remaining runtime is decreased as:: remaining runtime = remaining runtime - t @@ -112,7 +112,7 @@ CONTENTS - When the current time is equal to the replenishment time of a throttled task, the scheduling deadline and the remaining runtime are - updated as + updated as:: scheduling deadline = scheduling deadline + period remaining runtime = remaining runtime + runtime @@ -129,7 +129,7 @@ CONTENTS Reclamation of Unused Bandwidth) algorithm [15, 16, 17] and it is enabled when flag SCHED_FLAG_RECLAIM is set. - The following diagram illustrates the state names for tasks handled by GRUB: + The following diagram illustrates the state names for tasks handled by GRUB:: ------------ (d) | Active | @@ -168,7 +168,7 @@ CONTENTS breaking the real-time guarantees. The 0-lag time for a task entering the ActiveNonContending state is - computed as + computed as:: (runtime * dl_period) deadline - --------------------- @@ -183,7 +183,7 @@ CONTENTS the task's utilization must be removed from the previous runqueue's active utilization and must be added to the new runqueue's active utilization. In order to avoid races between a task waking up on a runqueue while the - "inactive timer" is running on a different CPU, the "dl_non_contending" + "inactive timer" is running on a different CPU, the "dl_non_contending" flag is used to indicate that a task is not on a runqueue but is active (so, the flag is set when the task blocks and is cleared when the "inactive timer" fires or when the task wakes up). @@ -222,36 +222,36 @@ CONTENTS Let's now see a trivial example of two deadline tasks with runtime equal - to 4 and period equal to 8 (i.e., bandwidth equal to 0.5): - - A Task T1 - | - | | - | | - |-------- |---- - | | V - |---|---|---|---|---|---|---|---|--------->t - 0 1 2 3 4 5 6 7 8 - - - A Task T2 - | - | | - | | - | ------------------------| - | | V - |---|---|---|---|---|---|---|---|--------->t - 0 1 2 3 4 5 6 7 8 - - - A running_bw - | - 1 ----------------- ------ - | | | - 0.5- ----------------- - | | - |---|---|---|---|---|---|---|---|--------->t - 0 1 2 3 4 5 6 7 8 + to 4 and period equal to 8 (i.e., bandwidth equal to 0.5):: + + A Task T1 + | + | | + | | + |-------- |---- + | | V + |---|---|---|---|---|---|---|---|--------->t + 0 1 2 3 4 5 6 7 8 + + + A Task T2 + | + | | + | | + | ------------------------| + | | V + |---|---|---|---|---|---|---|---|--------->t + 0 1 2 3 4 5 6 7 8 + + + A running_bw + | + 1 ----------------- ------ + | | | + 0.5- ----------------- + | | + |---|---|---|---|---|---|---|---|--------->t + 0 1 2 3 4 5 6 7 8 - Time t = 0: @@ -284,7 +284,7 @@ CONTENTS 2.3 Energy-aware scheduling ------------------------- +--------------------------- When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum @@ -299,15 +299,20 @@ CONTENTS 3. Scheduling Real-Time Tasks ============================= - * BIG FAT WARNING ****************************************************** - * - * This section contains a (not-thorough) summary on classical deadline - * scheduling theory, and how it applies to SCHED_DEADLINE. - * The reader can "safely" skip to Section 4 if only interested in seeing - * how the scheduling policy can be used. Anyway, we strongly recommend - * to come back here and continue reading (once the urge for testing is - * satisfied :P) to be sure of fully understanding all technical details. - ************************************************************************ + + + .. BIG FAT WARNING ****************************************************** + + .. warning:: + + This section contains a (not-thorough) summary on classical deadline + scheduling theory, and how it applies to SCHED_DEADLINE. + The reader can "safely" skip to Section 4 if only interested in seeing + how the scheduling policy can be used. Anyway, we strongly recommend + to come back here and continue reading (once the urge for testing is + satisfied :P) to be sure of fully understanding all technical details. + + .. ************************************************************************ There are no limitations on what kind of task can exploit this new scheduling discipline, even if it must be said that it is particularly @@ -329,6 +334,7 @@ CONTENTS sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally, d_j = r_j + D, where D is the task's relative deadline. Summing up, a real-time task can be described as + Task = (WCET, D, P) The utilization of a real-time task is defined as the ratio between its @@ -352,13 +358,15 @@ CONTENTS between the finishing time of a job and its absolute deadline). More precisely, it can be proven that using a global EDF scheduler the maximum tardiness of each task is smaller or equal than + ((M − 1) · WCET_max − WCET_min)/(M − (M − 2) · U_max) + WCET_max + where WCET_max = max{WCET_i} is the maximum WCET, WCET_min=min{WCET_i} is the minimum WCET, and U_max = max{WCET_i/P_i} is the maximum utilization[12]. 3.2 Schedulability Analysis for Uniprocessor Systems ------------------------- +---------------------------------------------------- If M=1 (uniprocessor system), or in case of partitioned scheduling (each real-time task is statically assigned to one and only one CPU), it is @@ -370,7 +378,9 @@ CONTENTS a task as WCET_i/min{D_i,P_i}, and EDF is able to respect all the deadlines of all the tasks running on a CPU if the sum of the densities of the tasks running on such a CPU is smaller or equal than 1: + sum(WCET_i / min{D_i, P_i}) <= 1 + It is important to notice that this condition is only sufficient, and not necessary: there are task sets that are schedulable, but do not respect the condition. For example, consider the task set {Task_1,Task_2} composed by @@ -379,7 +389,9 @@ CONTENTS (Task_1 is scheduled as soon as it is released, and finishes just in time to respect its deadline; Task_2 is scheduled immediately after Task_1, hence its response time cannot be larger than 50ms + 10ms = 60ms) even if + 50 / min{50,100} + 10 / min{100, 100} = 50 / 50 + 10 / 100 = 1.1 + Of course it is possible to test the exact schedulability of tasks with D_i != P_i (checking a condition that is both sufficient and necessary), but this cannot be done by comparing the total utilization or density with @@ -399,7 +411,7 @@ CONTENTS 4 Linux uses an admission test based on the tasks' utilizations. 3.3 Schedulability Analysis for Multiprocessor Systems ------------------------- +------------------------------------------------------ On multiprocessor systems with global EDF scheduling (non partitioned systems), a sufficient test for schedulability can not be based on the @@ -428,7 +440,9 @@ CONTENTS between total utilization (or density) and a fixed constant. If all tasks have D_i = P_i, a sufficient schedulability condition can be expressed in a simple way: + sum(WCET_i / P_i) <= M - (M - 1) · U_max + where U_max = max{WCET_i / P_i}[10]. Notice that for U_max = 1, M - (M - 1) · U_max becomes M - M + 1 = 1 and this schedulability condition just confirms the Dhall's effect. A more complete survey of the literature @@ -447,7 +461,7 @@ CONTENTS the tasks are limited. 3.4 Relationship with SCHED_DEADLINE Parameters ------------------------- +----------------------------------------------- Finally, it is important to understand the relationship between the SCHED_DEADLINE scheduling parameters described in Section 2 (runtime, @@ -473,6 +487,7 @@ CONTENTS this task, as it is not possible to respect its temporal constraints. References: + 1 - C. L. Liu and J. W. Layland. Scheduling algorithms for multiprogram- ming in a hard-real-time environment. Journal of the Association for Computing Machinery, 20(1), 1973. @@ -550,7 +565,7 @@ CONTENTS The interface used to control the CPU bandwidth that can be allocated to -deadline tasks is similar to the one already used for -rt tasks with real-time group scheduling (a.k.a. RT-throttling - see - Documentation/scheduler/sched-rt-group.txt), and is based on readable/ + Documentation/scheduler/sched-rt-group.rst), and is based on readable/ writable control files located in procfs (for system wide settings). Notice that per-group settings (controlled through cgroupfs) are still not defined for -deadline tasks, because more discussion is needed in order to @@ -596,11 +611,13 @@ CONTENTS Specifying a periodic/sporadic task that executes for a given amount of runtime at each instance, and that is scheduled according to the urgency of its own timing constraints needs, in general, a way of declaring: + - a (maximum/typical) instance execution time, - a minimum interval between consecutive instances, - a time constraint by which each instance must be completed. Therefore: + * a new struct sched_attr, containing all the necessary fields is provided; * the new scheduling related syscalls that manipulate it, i.e., @@ -658,21 +675,21 @@ CONTENTS ------------------------------------ An example of a simple configuration (pin a -deadline task to CPU0) - follows (rt-app is used to create a -deadline task). - - mkdir /dev/cpuset - mount -t cgroup -o cpuset cpuset /dev/cpuset - cd /dev/cpuset - mkdir cpu0 - echo 0 > cpu0/cpuset.cpus - echo 0 > cpu0/cpuset.mems - echo 1 > cpuset.cpu_exclusive - echo 0 > cpuset.sched_load_balance - echo 1 > cpu0/cpuset.cpu_exclusive - echo 1 > cpu0/cpuset.mem_exclusive - echo $$ > cpu0/tasks - rt-app -t 100000:10000:d:0 -D5 (it is now actually superfluous to specify - task affinity) + follows (rt-app is used to create a -deadline task):: + + mkdir /dev/cpuset + mount -t cgroup -o cpuset cpuset /dev/cpuset + cd /dev/cpuset + mkdir cpu0 + echo 0 > cpu0/cpuset.cpus + echo 0 > cpu0/cpuset.mems + echo 1 > cpuset.cpu_exclusive + echo 0 > cpuset.sched_load_balance + echo 1 > cpu0/cpuset.cpu_exclusive + echo 1 > cpu0/cpuset.mem_exclusive + echo $$ > cpu0/tasks + rt-app -t 100000:10000:d:0 -D5 # it is now actually superfluous to specify + # task affinity 6. Future plans =============== @@ -711,7 +728,7 @@ Appendix A. Test suite rt-app is available at: https://github.com/scheduler-tools/rt-app. Thread parameters can be specified from the command line, with something like - this: + this:: # rt-app -t 100000:10000:d -t 150000:20000:f:10 -D5 @@ -721,27 +738,27 @@ Appendix A. Test suite of 5 seconds. More interestingly, configurations can be described with a json file that - can be passed as input to rt-app with something like this: + can be passed as input to rt-app with something like this:: # rt-app my_config.json The parameters that can be specified with the second method are a superset of the command line options. Please refer to rt-app documentation for more - details (<rt-app-sources>/doc/*.json). + details (`<rt-app-sources>/doc/*.json`). The second testing application is a modification of schedtool, called schedtool-dl, which can be used to setup SCHED_DEADLINE parameters for a certain pid/application. schedtool-dl is available at: https://github.com/scheduler-tools/schedtool-dl.git. - The usage is straightforward: + The usage is straightforward:: # schedtool -E -t 10000000:100000000 -e ./my_cpuhog_app With this, my_cpuhog_app is put to run inside a SCHED_DEADLINE reservation of 10ms every 100ms (note that parameters are expressed in microseconds). You can also use schedtool to create a reservation for an already running - application, given that you know its pid: + application, given that you know its pid:: # schedtool -E -t 10000000:100000000 my_app_pid @@ -750,43 +767,43 @@ Appendix B. Minimal main() We provide in what follows a simple (ugly) self-contained code snippet showing how SCHED_DEADLINE reservations can be created by a real-time - application developer. - - #define _GNU_SOURCE - #include <unistd.h> - #include <stdio.h> - #include <stdlib.h> - #include <string.h> - #include <time.h> - #include <linux/unistd.h> - #include <linux/kernel.h> - #include <linux/types.h> - #include <sys/syscall.h> - #include <pthread.h> - - #define gettid() syscall(__NR_gettid) - - #define SCHED_DEADLINE 6 - - /* XXX use the proper syscall numbers */ - #ifdef __x86_64__ - #define __NR_sched_setattr 314 - #define __NR_sched_getattr 315 - #endif - - #ifdef __i386__ - #define __NR_sched_setattr 351 - #define __NR_sched_getattr 352 - #endif - - #ifdef __arm__ - #define __NR_sched_setattr 380 - #define __NR_sched_getattr 381 - #endif - - static volatile int done; - - struct sched_attr { + application developer:: + + #define _GNU_SOURCE + #include <unistd.h> + #include <stdio.h> + #include <stdlib.h> + #include <string.h> + #include <time.h> + #include <linux/unistd.h> + #include <linux/kernel.h> + #include <linux/types.h> + #include <sys/syscall.h> + #include <pthread.h> + + #define gettid() syscall(__NR_gettid) + + #define SCHED_DEADLINE 6 + + /* XXX use the proper syscall numbers */ + #ifdef __x86_64__ + #define __NR_sched_setattr 314 + #define __NR_sched_getattr 315 + #endif + + #ifdef __i386__ + #define __NR_sched_setattr 351 + #define __NR_sched_getattr 352 + #endif + + #ifdef __arm__ + #define __NR_sched_setattr 380 + #define __NR_sched_getattr 381 + #endif + + static volatile int done; + + struct sched_attr { __u32 size; __u32 sched_policy; @@ -802,25 +819,25 @@ Appendix B. Minimal main() __u64 sched_runtime; __u64 sched_deadline; __u64 sched_period; - }; + }; - int sched_setattr(pid_t pid, + int sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) - { + { return syscall(__NR_sched_setattr, pid, attr, flags); - } + } - int sched_getattr(pid_t pid, + int sched_getattr(pid_t pid, struct sched_attr *attr, unsigned int size, unsigned int flags) - { + { return syscall(__NR_sched_getattr, |