summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 09:36:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 09:36:54 -0700
commit6832d9652f395f7d13003e3884942c40f52ac1fa (patch)
tree40555ad5eda9700cb973dac4db136ad97f5e8b19
parent228abe73ad67665d71eacd6a8a347dd76b0115ae (diff)
parentc2e7fcf53c3cb02b4ada1c66a9bc8a4d97d58aba (diff)
Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timers/nohz changes from Ingo Molnar: "It mostly contains fixes and full dynticks off-case optimizations, by Frederic Weisbecker" * 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) nohz: Include local CPU in full dynticks global kick nohz: Optimize full dynticks's sched hooks with static keys nohz: Optimize full dynticks state checks with static keys nohz: Rename a few state variables vtime: Always debug check snapshot source _before_ updating it vtime: Always scale generic vtime accounting results vtime: Optimize full dynticks accounting off case with static keys vtime: Describe overriden functions in dedicated arch headers m68k: hardirq_count() only need preempt_mask.h hardirq: Split preempt count mask definitions context_tracking: Split low level state headers vtime: Fix racy cputime delta update vtime: Remove a few unneeded generic vtime state checks context_tracking: User/kernel broundary cross trace events context_tracking: Optimize context switch off case with static keys context_tracking: Optimize guest APIs off case with static key context_tracking: Optimize main APIs off case with static key context_tracking: Ground setup for static key use context_tracking: Remove full dynticks' hacky dependency on wide context tracking nohz: Only enable context tracking on full dynticks CPUs ...
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/irqflags.h2
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/vtime.h7
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--include/asm-generic/vtime.h0
-rw-r--r--include/linux/context_tracking.h128
-rw-r--r--include/linux/context_tracking_state.h39
-rw-r--r--include/linux/hardirq.h117
-rw-r--r--include/linux/preempt_mask.h122
-rw-r--r--include/linux/tick.h45
-rw-r--r--include/linux/vtime.h74
-rw-r--r--include/trace/events/context_tracking.h58
-rw-r--r--init/Kconfig28
-rw-r--r--init/main.c2
-rw-r--r--kernel/context_tracking.c125
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/cputime.c53
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/time/tick-sched.c61
21 files changed, 545 insertions, 327 deletions
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 05b03ecd7933..a3456f34f672 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
generic-y += exec.h
generic-y += kvm_para.h
generic-y += trace_clock.h
+generic-y += vtime.h \ No newline at end of file
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
index 5053092b369f..a823cd73dc09 100644
--- a/arch/m68k/include/asm/irqflags.h
+++ b/arch/m68k/include/asm/irqflags.h
@@ -3,7 +3,7 @@
#include <linux/types.h>
#ifdef CONFIG_MMU
-#include <linux/hardirq.h>
+#include <linux/preempt_mask.h>
#endif
#include <linux/preempt.h>
#include <asm/thread_info.h>
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 650757c300db..704e6f10ae80 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,3 +2,4 @@
generic-y += clkdev.h
generic-y += rwsem.h
generic-y += trace_clock.h
+generic-y += vtime.h \ No newline at end of file
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0c..f65bd3634519 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
#include <asm/div64.h>
-#define __ARCH_HAS_VTIME_ACCOUNT
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000000..af9896c53eb3
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9b9c1b78ec67..abcfab55f99b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/vtimer.h>
+#include <asm/vtime.h>
#include <asm/irq.h>
#include "entry.h"
diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/include/asm-generic/vtime.h
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index fc09d7b0dacf..158158704c30 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -2,100 +2,110 @@
#define _LINUX_CONTEXT_TRACKING_H
#include <linux/sched.h>
-#include <linux/percpu.h>
#include <linux/vtime.h>
+#include <linux/context_tracking_state.h>
#include <asm/ptrace.h>
-struct context_tracking {
- /*
- * When active is false, probes are unset in order
- * to minimize overhead: TIF flags are cleared
- * and calls to user_enter/exit are ignored. This
- * may be further optimized using static keys.
- */
- bool active;
- enum ctx_state {
- IN_KERNEL = 0,
- IN_USER,
- } state;
-};
-
-static inline void __guest_enter(void)
-{
- /*
- * This is running in ioctl context so we can avoid
- * the call to vtime_account() with its unnecessary idle check.
- */
- vtime_account_system(current);
- current->flags |= PF_VCPU;
-}
-
-static inline void __guest_exit(void)
-{
- /*
- * This is running in ioctl context so we can avoid
- * the call to vtime_account() with its unnecessary idle check.
- */
- vtime_account_system(current);
- current->flags &= ~PF_VCPU;
-}
#ifdef CONFIG_CONTEXT_TRACKING
-DECLARE_PER_CPU(struct context_tracking, context_tracking);
+extern void context_tracking_cpu_set(int cpu);
-static inline bool context_tracking_in_user(void)
+extern void context_tracking_user_enter(void);
+extern void context_tracking_user_exit(void);
+extern void __context_tracking_task_switch(struct task_struct *prev,
+ struct task_struct *next);
+
+static inline void user_enter(void)
{
- return __this_cpu_read(context_tracking.state) == IN_USER;
-}
+ if (static_key_false(&context_tracking_enabled))
+ context_tracking_user_enter();
-static inline bool context_tracking_active(void)
+}
+static inline void user_exit(void)
{
- return __this_cpu_read(context_tracking.active);
+ if (static_key_false(&context_tracking_enabled))
+ context_tracking_user_exit();
}
-extern void user_enter(void);
-extern void user_exit(void);
-
-extern void guest_enter(void);
-extern void guest_exit(void);
-
static inline enum ctx_state exception_enter(void)
{
enum ctx_state prev_ctx;
+ if (!static_key_false(&context_tracking_enabled))
+ return 0;
+
prev_ctx = this_cpu_read(context_tracking.state);
- user_exit();
+ context_tracking_user_exit();
return prev_ctx;
}
static inline void exception_exit(enum ctx_state prev_ctx)
{
- if (prev_ctx == IN_USER)
- user_enter();
+ if (static_key_false(&context_tracking_enabled)) {
+ if (prev_ctx == IN_USER)
+ context_tracking_user_enter();
+ }
}
-extern void context_tracking_task_switch(struct task_struct *prev,
- struct task_struct *next);
+static inline void context_tracking_task_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (static_key_false(&context_tracking_enabled))
+ __context_tracking_task_switch(prev, next);
+}
#else
-static inline bool context_tracking_in_user(void) { return false; }
static inline void user_enter(void) { }
static inline void user_exit(void) { }
+static inline enum ctx_state exception_enter(void) { return 0; }
+static inline void exception_exit(enum ctx_state prev_ctx) { }
+static inline void context_tracking_task_switch(struct task_struct *prev,
+ struct task_struct *next) { }
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
+
+#ifdef CONFIG_CONTEXT_TRACKING_FORCE
+extern void context_tracking_init(void);
+#else
+static inline void context_tracking_init(void) { }
+#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline void guest_enter(void)
{
- __guest_enter();
+ if (vtime_accounting_enabled())
+ vtime_guest_enter(current);
+ else
+ current->flags |= PF_VCPU;
}
static inline void guest_exit(void)
{
- __guest_exit();
+ if (vtime_accounting_enabled())
+ vtime_guest_exit(current);
+ else
+ current->flags &= ~PF_VCPU;
}
-static inline enum ctx_state exception_enter(void) { return 0; }
-static inline void exception_exit(enum ctx_state prev_ctx) { }
-static inline void context_tracking_task_switch(struct task_struct *prev,
- struct task_struct *next) { }
-#endif /* !CONFIG_CONTEXT_TRACKING */
+#else
+static inline void guest_enter(void)
+{
+ /*
+ * This is running in ioctl context so its safe
+ * to assume that it's the stime pending cputime
+ * to flush.
+ */
+ vtime_account_system(current);
+ current->flags |= PF_VCPU;
+}
+
+static inline void guest_exit(void)
+{
+ /* Flush the guest cputime we spent on the guest */
+ vtime_account_system(current);
+ current->flags &= ~PF_VCPU;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
#endif
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
new file mode 100644
index 000000000000..0f1979d0674f
--- /dev/null
+++ b/include/linux/context_tracking_state.h
@@ -0,0 +1,39 @@
+#ifndef _LINUX_CONTEXT_TRACKING_STATE_H
+#define _LINUX_CONTEXT_TRACKING_STATE_H
+
+#include <linux/percpu.h>
+#include <linux/static_key.h>
+
+struct context_tracking {
+ /*
+ * When active is false, probes are unset in order
+ * to minimize overhead: TIF flags are cleared
+ * and calls to user_enter/exit are ignored. This
+ * may be further optimized using static keys.
+ */
+ bool active;
+ enum ctx_state {
+ IN_KERNEL = 0,
+ IN_USER,
+ } state;
+};
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern struct static_key context_tracking_enabled;
+DECLARE_PER_CPU(struct context_tracking, context_tracking);
+
+static inline bool context_tracking_in_user(void)
+{
+ return __this_cpu_read(context_tracking.state) == IN_USER;
+}
+
+static inline bool context_tracking_active(void)
+{
+ return __this_cpu_read(context_tracking.active);
+}
+#else
+static inline bool context_tracking_in_user(void) { return false; }
+static inline bool context_tracking_active(void) { return false; }
+#endif /* CONFIG_CONTEXT_TRACKING */
+
+#endif
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 05bcc0903766..ccfe17c5c8da 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -1,126 +1,11 @@
#ifndef LINUX_HARDIRQ_H
#define LINUX_HARDIRQ_H
-#include <linux/preempt.h>
+#include <linux/preempt_mask.h>
#include <linux/lockdep.h>
#include <linux/ftrace_irq.h>
#include <linux/vtime.h>
-#include <asm/hardirq.h>
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- *
- * The hardirq count can in theory reach the same as NR_IRQS.
- * In reality, the number of nested IRQS is limited to the stack
- * size as well. For archs with over 1000 IRQS it is not practical
- * to expect that they will all nest. We give a max of 10 bits for
- * hardirq nesting. An arch may choose to give less than 10 bits.
- * m68k expects it to be 8.
- *
- * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
- * - bit 26 is the NMI_MASK
- * - bit 27 is the PREEMPT_ACTIVE flag
- *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x03ff0000
- * NMI_MASK: 0x04000000
- */
-#define PREEMPT_BITS 8
-#define SOFTIRQ_BITS 8
-#define NMI_BITS 1
-
-#define MAX_HARDIRQ_BITS 10
-
-#ifndef HARDIRQ_BITS
-# define HARDIRQ_BITS MAX_HARDIRQ_BITS
-#endif
-
-#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
-#error HARDIRQ_BITS too high!
-#endif
-
-#define PREEMPT_SHIFT 0
-#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
-
-#define __IRQ_MASK(x) ((1UL << (x))-1)
-
-#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
-#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
-#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
-#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
-
-#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
-#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
-#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
-#define NMI_OFFSET (1UL << NMI_SHIFT)
-
-#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
-
-#ifndef PREEMPT_ACTIVE
-#define PREEMPT_ACTIVE_BITS 1
-#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
-#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-#endif
-
-#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
-#error PREEMPT_ACTIVE is too low!
-#endif
-
-#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
-#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
-#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
- | NMI_MASK))
-
-/*
- * Are we doing bottom half or hardware interrupt processing?
- * Are we in a softirq context? Interrupt context?
- * in_softirq - Are we currently processing softirq or have bh disabled?
- * in_serving_softirq - Are we currently processing softirq?
- */
-#define in_irq() (hardirq_count())
-#define in_softirq() (softirq_count())
-#define in_interrupt() (irq_count())
-#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
-
-/*
- * Are we in NMI context?
- */
-#define in_nmi() (preempt_count() & NMI_MASK)
-
-#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
-
-/*
- * Are we running in atomic context? WARNING: this macro cannot
- * always detect atomic context; in particular, it cannot know about
- * held spinlocks in non-preemptible kernels. Thus it should not be
- * used in the general case to determine whether sleeping is possible.
- * Do not use in_atomic() in driver code.
- */
-#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
-
-/*
- * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler, *after* releasing the kernel lock)
- */
-#define in_atomic_preempt_off() \
- ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
-
-#ifdef CONFIG_PREEMPT_COUNT
-# define preemptible() (preempt_count() == 0 && !irqs_disabled())
-#else
-# define preemptible() 0
-#endif
#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
extern void synchronize_irq(unsigned int irq);
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
new file mode 100644
index 000000000000..931bc616219f
--- /dev/null
+++ b/include/linux/preempt_mask.h
@@ -0,0 +1,122 @@
+#ifndef LINUX_PREEMPT_MASK_H
+#define LINUX_PREEMPT_MASK_H
+
+#include <linux/preempt.h>
+#include <asm/hardirq.h>
+
+/*
+ * We put the hardirq and softirq counter into the preemption
+ * counter. The bitmask has the following meaning:
+ *
+ * - bits 0-7 are the preemption count (max preemption depth: 256)
+ * - bits 8-15 are the softirq count (max # of softirqs: 256)
+ *
+ * The hardirq count can in theory reach the same as NR_IRQS.
+ * In reality, the number of nested IRQS is limited to the stack
+ * size as well. For archs with over 1000 IRQS it is not practical
+ * to expect that they will all nest. We give a max of 10 bits for
+ * hardirq nesting. An arch may choose to give less than 10 bits.
+ * m68k expects it to be 8.
+ *
+ * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
+ * - bit 26 is the NMI_MASK
+ * - bit 27 is the PREEMPT_ACTIVE flag
+ *
+ * PREEMPT_MASK: 0x000000ff
+ * SOFTIRQ_MASK: 0x0000ff00
+ * HARDIRQ_MASK: 0x03ff0000
+ * NMI_MASK: 0x04000000
+ */
+#define PREEMPT_BITS 8
+#define SOFTIRQ_BITS 8
+#define NMI_BITS 1
+
+#define MAX_HARDIRQ_BITS 10
+
+#ifndef HARDIRQ_BITS
+# define HARDIRQ_BITS MAX_HARDIRQ_BITS
+#endif
+
+#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
+#error HARDIRQ_BITS too high!
+#endif
+
+#define PREEMPT_SHIFT 0
+#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x) ((1UL << (x))-1)
+
+#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
+#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+
+#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
+#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
+#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
+#define NMI_OFFSET (1UL << NMI_SHIFT)
+
+#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
+
+#ifndef PREEMPT_ACTIVE
+#define PREEMPT_ACTIVE_BITS 1
+#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
+#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
+#endif
+
+#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
+#error PREEMPT_ACTIVE is too low!
+#endif
+
+#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
+#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
+#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+ | NMI_MASK))
+
+/*
+ * Are we doing bottom half or hardware interrupt processing?
+ * Are we in a softirq context? Interrupt context?
+ * in_softirq - Are we currently processing softirq or have bh disabled?
+ * in_serving_softirq - Are we currently processing softirq?
+ */
+#define in_irq() (hardirq_count())
+#define in_softirq() (softirq_count())
+#define in_interrupt() (irq_count())
+#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
+
+/*
+ * Are we in NMI context?
+ */
+#define in_nmi() (preempt_count() & NMI_MASK)
+
+#if defined(CONFIG_PREEMPT_COUNT)
+# define PREEMPT_CHECK_OFFSET 1
+#else
+# define PREEMPT_CHECK_OFFSET 0
+#endif
+
+/*
+ * Are we running in atomic context? WARNING: this macro cannot
+ * always detect atomic context; in particular, it cannot know about
+ * held spinlocks in non-preemptible kernels. Thus it should not be
+ * used in the general case to determine whether sleeping is possible.
+ * Do not use in_atomic() in driver code.
+ */
+#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+
+/*
+ * Check whether we were atomic before we did preempt_disable():
+ * (used by the scheduler, *after* releasing the kernel lock)
+ */
+#define in_atomic_preempt_off() \
+ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+
+#ifdef CONFIG_PREEMPT_COUNT
+# define preemptible() (preempt_count() == 0 && !irqs_disabled())
+#else
+# define preemptible() 0
+#endif
+
+#endif /* LINUX_PREEMPT_MASK_H */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 62bd8b72873c..5128d33bbb39 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -10,6 +10,8 @@
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
+#include <linux/context_tracking_state.h>
+#include <linux/cpumask.h>
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -158,20 +160,51 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
# endif /* !CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL
+extern bool tick_nohz_full_running;
+extern cpumask_var_t tick_nohz_full_mask;
+
+static inline bool tick_nohz_full_enabled(void)
+{
+ if (!static_key_false(&context_tracking_enabled))
+ return false;
+
+ return tick_nohz_full_running;
+}
+
+static inline bool tick_nohz_full_cpu(int cpu)
+{
+ if (!tick_nohz_full_enabled())
+ return false;
+
+ return cpumask_test_cpu(cpu, tick_nohz_full_mask);
+}
+
extern void tick_nohz_init(void);
-extern int tick_nohz_full_cpu(int cpu);
-extern void tick_nohz_full_check(void);
+extern void __tick_nohz_full_check(void);
extern void tick_nohz_full_kick(void);
extern void tick_nohz_full_kick_all(void);
-extern void tick_nohz_task_switch(struct task_struct *tsk);
+extern void __tick_nohz_task_switch(struct task_struct *tsk);
#else
static inline void tick_nohz_init(void) { }
-static inline int tick_nohz_full_cpu(int cpu) { return 0; }
-static inline void tick_nohz_full_check(void) { }
+static inline bool tick_nohz_full_enabled(void) { return false; }
+static inline bool tick_nohz_full_cpu(int cpu) { return false; }
+static inline void __tick_nohz_full_check(void) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
-static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
+static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
#endif
+static inline void tick_nohz_full_check(void)
+{
+ if (tick_nohz_full_enabled())
+ __tick_nohz_full_check();
+}
+
+static inline void tick_nohz_task_switch(struct task_struct *tsk)
+{
+ if (tick_nohz_full_enabled())
+ __tick_nohz_task_switch(tsk);
+}
+
#endif
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index b1dd2db80076..f5b72b364bda 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -1,18 +1,68 @@
#ifndef _LINUX_KERNEL_VTIME_H
#define _LINUX_KERNEL_VTIME_H
+#include <linux/context_tracking_state.h>
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#include <asm/vtime.h>
+#endif
+
+
struct task_struct;
+/*
+ * vtime_accounting_enabled() definitions/declarations
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static inline bool vtime_accounting_enabled(void) { return true; }
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static inline bool vtime_accounting_enabled(void)
+{
+ if (static_key_false(&context_tracking_enabled)) {
+ if (context_tracking_active())
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+static inline bool vtime_accounting_enabled(void) { return false; }
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
+
+/*
+ * Common vtime APIs
+ */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
extern void vtime_task_switch(struct task_struct *prev);
+#else
+extern void vtime_common_task_switch(struct task_struct *prev);
+static inline void vtime_task_switch(struct task_struct *prev)
+{
+ if (vtime_accounting_enabled())
+ vtime_common_task_switch(prev);
+}
+#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
+
extern void vtime_account_system(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account_irq_enter(struct task_struct *tsk);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-static inline bool vtime_accounting_enabled(void) { return true; }
-#endif
+#ifdef __ARCH_HAS_VTIME_ACCOUNT
+extern void vtime_account_irq_enter(struct task_struct *tsk);
+#else
+extern void vtime_common_account_irq_enter(struct task_struct *tsk);
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
+{
+ if (vtime_accounting_enabled())
+ vtime_common_account_irq_enter(tsk);
+}
+#endif /* __ARCH_HAS_VTIME_ACCOUNT */
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
@@ -20,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { }
static inline void vtime_account_user(struct task_struct *tsk) { }
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
-static inline bool vtime_accounting_enabled(void) { return false; }
-#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_account_irq_exit(struct task_struct *tsk);
-extern bool vtime_accounting_enabled(void);
+extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
+
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+ if (vtime_accounting_enabled())
+ vtime_gen_account_irq_exit(tsk);
+}
+
extern void vtime_user_enter(struct task_struct *tsk);
+
static inline void vtime_user_exit(struct task_struct *tsk)
{
vtime_account_user(tsk);
@@ -35,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk)
extern void vtime_guest_enter(struct task_struct *tsk);
extern void vtime_guest_exit(struct task_struct *tsk);
extern void vtime_init_idle(struct task_struct *tsk, int cpu);
-#else
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
diff --git a/include/trace/events/context_tracking.h b/include/trace/events/context_tracking.h
new file mode 100644
index 000000000000..ce8007cf29cf
--- /dev/null
+++ b/include/trace/events/context_tracking.h
@@ -0,0 +1,58 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM context_tracking
+
+#if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CONTEXT_TRACKING_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(context_tracking_user,
+
+ TP_PROTO(int dummy),
+
+ TP_ARGS(dummy),
+
+ TP_STRUCT__entry(
+ __field( int, dummy )
+ ),
+
+ TP_fast_assign(
+ __entry->dummy = dummy;
+ ),
+
+ TP_printk("%s", "")
+);
+
+/**
+ * user_enter - called when the kernel resumes to userspace
+ * @dummy: dummy arg to make trace event macro happy
+ *
+ * This event occurs when the kernel resumes to userspace after
+ * an exception or a syscall.
+ */
+DEFINE_EVENT(context_tracking_user, user_enter,
+
+ TP_PROTO(int dummy),
+
+ TP_ARGS(dummy)
+);
+
+/**
+ * user_exit - called when userspace enters the kernel
+ * @dummy: dummy arg to make trace event macro happy
+ *
+ * This event occurs when userspace enters the kernel through
+ * an exception or a syscall.
+ */
+DEFINE_EVENT(context_tracking_user, user_exit,
+
+ TP_PROTO(int dummy),
+
+ TP_ARGS(dummy)
+);
+
+
+#endif /* _TRACE_CONTEXT_TRACKING_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/Kconfig b/init/Kconfig
index cc917d3ec858..0a2c4bcf179e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -528,13 +528,29 @@ config RCU_USER_QS
config CONTEXT_TRACKING_FORCE
bool "Force context tracking"
depends on CONTEXT_TRACKING
- default CONTEXT_TRACKING
+ default y if !NO_HZ_FULL
help
- Probe on user/kernel boundaries by default in order to
- test the features that rely on it such as userspace RCU extended
- quiescent states.
- This test is there for debugging until we have a real user like the
- full dynticks mode.
+ The major pre-requirement for full dynticks to work is to
+ support the context tracking subsystem. But there are also
+ other dependencies to provide in order to make the full
+ dynticks working.
+
+ This option stands for testing when an arch implements the
+ context tracking backend but doesn't yet fullfill all the
+ requirements to make the full dynticks feature working.
+ Without the full dynticks, there is no way to test the support
+ for context tracking and the subsystems that rely on it: RCU
+ userspace extended quiescent state and tickless cputime
+ accounting. This option copes with the absence of the full
+ dynticks subsystem by forcing the context tracking on all
+ CPUs in the system.
+
+ Say Y only if you're working on the developpement of an
+ architecture backend for the context tracking.
+
+ Say N otherwise, this option brings an overhead that you
+ don't want in production.
+
config RCU_FANOUT
int "Tree-based hierarchical RCU fanout value"
diff --git a/init/main.c b/init/main.c
index d03d2ec2eacf..af310afbef28 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,6 +75,7 @@
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/sched_clock.h>
+#include <linux/context_tracking.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -545,6 +546,7 @@ asmlinkage void __init start_kernel(void)
idr_init_cache();
rcu_init();
tick_nohz_init();
+ context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 383f8231e436..247091bf0587 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -20,22 +20,33 @@
#include <linux/hardirq.h>
#include <linux/export.h>
-DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
-#ifdef CONFIG_CONTEXT_TRACKING_FORCE
- .active = true,
-#endif
-};
+#define CREATE_TRACE_POINTS
+#include <trace/events/context_tracking.h>
+
+struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(context_tracking_enabled);
+
+DEFINE_PER_CPU(struct context_tracking, context_tracking);
+EXPORT_SYMBOL_GPL(context_tracking);
+
+void context_tracking_cpu_set(int cpu)
+{
+ if (!per_cpu(context_tracking.active, cpu)) {
+ per_cpu(context_tracking.active, cpu) = true;
+ static_key_slow_inc(&context_tracking_enabled);
+ }
+}
/**
- * user_enter - Inform the context tracking that the CPU is going to
- * enter userspace mode.
+ * context_tracking_user_enter - Inform the context tracking that the CPU is going to
+ * enter userspace mode.
*
* This function must be called right before we switch from the kernel
* to userspace, when it's guaranteed the remaining kernel instructions
* to execute won't use any RCU read side critical section because this
* function sets RCU in extended quiescent state.
*/
-void user_enter(void)
+void context_trackin