summaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig50
-rw-r--r--kernel/time/timer_list.c41
2 files changed, 74 insertions, 17 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 747bbc70f53b..2b62fe86f9ec 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -133,6 +133,56 @@ config NO_HZ_FULL_ALL
Note the boot CPU will still be kept outside the range to
handle the timekeeping duty.
+config NO_HZ_FULL_SYSIDLE
+ bool "Detect full-system idle state for full dynticks system"
+ depends on NO_HZ_FULL
+ default n
+ help
+ At least one CPU must keep the scheduling-clock tick running for
+ timekeeping purposes whenever there is a non-idle CPU, where
+ "non-idle" also includes dynticks CPUs as long as they are
+ running non-idle tasks. Because the underlying adaptive-tick
+ support cannot distinguish between all CPUs being idle and
+ all CPUs each running a single task in dynticks mode, the
+ underlying support simply ensures that there is always a CPU
+ handling the scheduling-clock tick, whether or not all CPUs
+ are idle. This Kconfig option enables scalable detection of
+ the all-CPUs-idle state, thus allowing the scheduling-clock
+ tick to be disabled when all CPUs are idle. Note that scalable
+ detection of the all-CPUs-idle state means that larger systems
+ will be slower to declare the all-CPUs-idle state.
+
+ Say Y if you would like to help debug all-CPUs-idle detection.
+
+ Say N if you are unsure.
+
+config NO_HZ_FULL_SYSIDLE_SMALL
+ int "Number of CPUs above which large-system approach is used"
+ depends on NO_HZ_FULL_SYSIDLE
+ range 1 NR_CPUS
+ default 8
+ help
+ The full-system idle detection mechanism takes a lazy approach
+ on large systems, as is required to attain decent scalability.
+ However, on smaller systems, scalability is not anywhere near as
+ large a concern as is energy efficiency. The sysidle subsystem
+ therefore uses a fast but non-scalable algorithm for small
+ systems and a lazier but scalable algorithm for large systems.
+ This Kconfig parameter defines the number of CPUs in the largest
+ system that will be considered to be "small".
+
+ The default value will be fine in most cases. Battery-powered
+ systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
+ numbers of CPUs, and (3) are suffering from battery-lifetime
+ problems due to long sysidle latencies might wish to experiment
+ with larger values for this Kconfig parameter. On the other
+ hand, they might be even better served by disabling NO_HZ_FULL
+ entirely, given that NO_HZ_FULL is intended for HPC and
+ real-time workloads that at present do not tend to be run on
+ battery-powered systems.
+
+ Take the default if you are unsure.
+
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 3bdf28323012..61ed862cdd37 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -265,10 +265,9 @@ static inline void timer_list_header(struct seq_file *m, u64 now)
static int timer_list_show(struct seq_file *m, void *v)
{
struct timer_list_iter *iter = v;
- u64 now = ktime_to_ns(ktime_get());
if (iter->cpu == -1 && !iter->second_pass)
- timer_list_header(m, now);
+ timer_list_header(m, iter->now);
else if (!iter->second_pass)
print_cpu(m, iter->cpu, iter->now);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -298,33 +297,41 @@ void sysrq_timer_list_show(void)
return;
}
-static void *timer_list_start(struct seq_file *file, loff_t *offset)
+static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
- struct timer_list_iter *iter = file->private;
-
- if (!*offset) {
- iter->cpu = -1;
- iter->now = ktime_to_ns(ktime_get());
- } else if (iter->cpu >= nr_cpu_ids) {
+ for (; offset; offset--) {
+ iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
+ if (iter->cpu >= nr_cpu_ids) {
#ifdef CONFIG_GENERIC_CLOCKEVENTS
- if (!iter->second_pass) {
- iter->cpu = -1;
- iter->second_pass = true;
- } else
- return NULL;
+ if (!iter->second_pass) {
+ iter->cpu = -1;
+ iter->second_pass = true;
+ } else
+ return NULL;
#else
- return NULL;
+ return NULL;
#endif
+ }
}
return iter;
}
+static void *timer_list_start(struct seq_file *file, loff_t *offset)
+{
+ struct timer_list_iter *iter = file->private;
+
+ if (!*offset)
+ iter->now = ktime_to_ns(ktime_get());
+ iter->cpu = -1;
+ iter->second_pass = false;
+ return move_iter(iter, *offset);
+}
+
static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
{
struct timer_list_iter *iter = file->private;
- iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
++*offset;
- return timer_list_start(file, offset);
+ return move_iter(iter, 1);
}
static void timer_list_stop(struct seq_file *seq, void *v)