summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 18:41:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 18:41:21 -0700
commit4c174688ee92805aa5df6e06e5b625a3286e415c (patch)
tree78e18b242b31a3a50eda41bfdd4705e07f13647a
parent9c35baf6cee9a5745d55de6f9995916dde642517 (diff)
parent73a757e63114dfd765f1c5d1ff7e994f123d0234 (diff)
Merge tag 'trace-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt: "New features for this release: - Pretty much a full rewrite of the processing of function plugins. i.e. echo do_IRQ:stacktrace > set_ftrace_filter - The rewrite was needed to add plugins to be unique to tracing instances. i.e. mkdir instance/foo; cd instances/foo; echo do_IRQ:stacktrace > set_ftrace_filter The old way was written very hacky. This removes a lot of those hacks. - New "function-fork" tracing option. When set, pids in the set_ftrace_pid will have their children added when the processes with their pids listed in the set_ftrace_pid file forks. - Exposure of "maxactive" for kretprobe in kprobe_events - Allow for builtin init functions to be traced by the function tracer (via the kernel command line). Module init function tracing will come in the next release. - Added more selftests, and have selftests also test in an instance" * tag 'trace-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (60 commits) ring-buffer: Return reader page back into existing ring buffer selftests: ftrace: Allow some event trigger tests to run in an instance selftests: ftrace: Have some basic tests run in a tracing instance too selftests: ftrace: Have event tests also run in an tracing instance selftests: ftrace: Make func_event_triggers and func_traceonoff_triggers tests do instances selftests: ftrace: Allow some tests to be run in a tracing instance tracing/ftrace: Allow for instances to trigger their own stacktrace probes tracing/ftrace: Allow for the traceonoff probe be unique to instances tracing/ftrace: Enable snapshot function trigger to work with instances tracing/ftrace: Allow instances to have their own function probes tracing/ftrace: Add a better way to pass data via the probe functions ftrace: Dynamically create the probe ftrace_ops for the trace_array tracing: Pass the trace_array into ftrace_probe_ops functions tracing: Have the trace_array hold the list of registered func probes ftrace: If the hash for a probe fails to update then free what was initialized ftrace: Have the function probes call their own function ftrace: Have each function probe use its own ftrace_ops ftrace: Have unregister_ftrace_function_probe_func() return a value ftrace: Add helper function ftrace_hash_move_and_update_ops() ftrace: Remove data field from ftrace_func_probe structure ...
-rw-r--r--Documentation/trace/kprobetrace.txt5
-rw-r--r--arch/x86/kernel/ftrace.c8
-rw-r--r--include/linux/ftrace.h92
-rw-r--r--include/linux/init.h4
-rw-r--r--include/linux/rcupdate.h5
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--include/linux/trace_events.h11
-rw-r--r--include/linux/tracepoint.h19
-rw-r--r--init/main.c10
-rw-r--r--kernel/rcu/tree.c62
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/ftrace.c996
-rw-r--r--kernel/trace/ring_buffer.c40
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace.c242
-rw-r--r--kernel/trace/trace.h77
-rw-r--r--kernel/trace/trace_benchmark.c14
-rw-r--r--kernel/trace/trace_events.c151
-rw-r--r--kernel/trace/trace_functions.c227
-rw-r--r--kernel/trace/trace_kprobe.c39
-rw-r--r--kernel/trace/trace_stack.c35
-rw-r--r--mm/page_alloc.c1
-rw-r--r--scripts/recordmcount.c1
-rwxr-xr-xscripts/recordmcount.pl1
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest23
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic2.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic3.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-enable.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-pid.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc114
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc132
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc172
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions21
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc39
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc1
40 files changed, 1902 insertions, 656 deletions
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 5ea85059db3b..1a3a3d6bc2a8 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -24,7 +24,7 @@ current_tracer. Instead of that, add probe points via
Synopsis of kprobe_events
-------------------------
p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
- r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
+ r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
-:[GRP/]EVENT : Clear a probe
GRP : Group name. If omitted, use "kprobes" for it.
@@ -33,6 +33,9 @@ Synopsis of kprobe_events
MOD : Module name which has given SYM.
SYM[+offs] : Symbol+offset where the probe is inserted.
MEMADDR : Address where the probe is inserted.
+ MAXACTIVE : Maximum number of instances of the specified function that
+ can be probed simultaneously, or 0 for the default value
+ as defined in Documentation/kprobes.txt section 1.3.1.
FETCHARGS : Arguments. Each probe can have up to 128 args.
%REG : Fetch register REG
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5b7153540727..8ee76dce9140 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -533,7 +533,13 @@ static void do_sync_core(void *data)
static void run_sync(void)
{
- int enable_irqs = irqs_disabled();
+ int enable_irqs;
+
+ /* No need to sync if there's only one CPU */
+ if (num_online_cpus() == 1)
+ return;
+
+ enable_irqs = irqs_disabled();
/* We may be called with interrupts disabled (on bootup). */
if (enable_irqs)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3633e8beff39..6d2a63e4ea52 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -42,8 +42,10 @@
/* Main tracing buffer and events set up */
#ifdef CONFIG_TRACING
void trace_init(void);
+void early_trace_init(void);
#else
static inline void trace_init(void) { }
+static inline void early_trace_init(void) { }
#endif
struct module;
@@ -144,6 +146,10 @@ struct ftrace_ops_hash {
struct ftrace_hash *filter_hash;
struct mutex regex_lock;
};
+
+void ftrace_free_init_mem(void);
+#else
+static inline void ftrace_free_init_mem(void) { }
#endif
/*
@@ -260,6 +266,7 @@ static inline int ftrace_nr_registered_ops(void)
}
static inline void clear_ftrace_function(void) { }
static inline void ftrace_kill(void) { }
+static inline void ftrace_free_init_mem(void) { }
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_STACK_TRACER
@@ -279,15 +286,45 @@ int
stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
-#endif
-struct ftrace_func_command {
- struct list_head list;
- char *name;
- int (*func)(struct ftrace_hash *hash,
- char *func, char *cmd,
- char *params, int enable);
-};
+/* DO NOT MODIFY THIS VARIABLE DIRECTLY! */
+DECLARE_PER_CPU(int, disable_stack_tracer);
+
+/**
+ * stack_tracer_disable - temporarily disable the stack tracer
+ *
+ * There's a few locations (namely in RCU) where stack tracing
+ * cannot be executed. This function is used to disable stack
+ * tracing during those critical sections.
+ *
+ * This function must be called with preemption or interrupts
+ * disabled and stack_tracer_enable() must be called shortly after
+ * while preemption or interrupts are still disabled.
+ */
+static inline void stack_tracer_disable(void)
+{
+ /* Preemption or interupts must be disabled */
+ if (IS_ENABLED(CONFIG_PREEMPT_DEBUG))
+ WARN_ON_ONCE(!preempt_count() || !irqs_disabled());
+ this_cpu_inc(disable_stack_tracer);
+}
+
+/**
+ * stack_tracer_enable - re-enable the stack tracer
+ *
+ * After stack_tracer_disable() is called, stack_tracer_enable()
+ * must be called shortly afterward.
+ */
+static inline void stack_tracer_enable(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_DEBUG))
+ WARN_ON_ONCE(!preempt_count() || !irqs_disabled());
+ this_cpu_dec(disable_stack_tracer);
+}
+#else
+static inline void stack_tracer_disable(void) { }
+static inline void stack_tracer_enable(void) { }
+#endif
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -315,30 +352,6 @@ void ftrace_bug(int err, struct dyn_ftrace *rec);
struct seq_file;
-struct ftrace_probe_ops {
- void (*func)(unsigned long ip,
- unsigned long parent_ip,
- void **data);
- int (*init)(struct ftrace_probe_ops *ops,
- unsigned long ip, void **data);
- void (*free)(struct ftrace_probe_ops *ops,
- unsigned long ip, void **data);
- int (*print)(struct seq_file *m,
- unsigned long ip,
- struct ftrace_probe_ops *ops,
- void *data);
-};
-
-extern int
-register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
- void *data);
-extern void
-unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
- void *data);
-extern void
-unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops);
-extern void unregister_ftrace_function_probe_all(char *glob);
-
extern int ftrace_text_reserved(const void *start, const void *end);
extern int ftrace_nr_registered_ops(void);
@@ -400,9 +413,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
void ftrace_free_filter(struct ftrace_ops *ops);
void ftrace_ops_set_global_filter(struct ftrace_ops *ops);
-int register_ftrace_command(struct ftrace_func_command *cmd);
-int unregister_ftrace_command(struct ftrace_func_command *cmd);
-
enum {
FTRACE_UPDATE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
@@ -433,8 +443,8 @@ enum {
FTRACE_ITER_FILTER = (1 << 0),
FTRACE_ITER_NOTRACE = (1 << 1),
FTRACE_ITER_PRINTALL = (1 << 2),
- FTRACE_ITER_DO_HASH = (1 << 3),
- FTRACE_ITER_HASH = (1 << 4),
+ FTRACE_ITER_DO_PROBES = (1 << 3),
+ FTRACE_ITER_PROBE = (1 << 4),
FTRACE_ITER_ENABLED = (1 << 5),
};
@@ -618,14 +628,6 @@ static inline void ftrace_enable_daemon(void) { }
static inline void ftrace_module_init(struct module *mod) { }
static inline void ftrace_module_enable(struct module *mod) { }
static inline void ftrace_release_mod(struct module *mod) { }
-static inline __init int register_ftrace_command(struct ftrace_func_command *cmd)
-{
- return -EINVAL;
-}
-static inline __init int unregister_ftrace_command(char *cmd_name)
-{
- return -EINVAL;
-}
static inline int ftrace_text_reserved(const void *start, const void *end)
{
return 0;
diff --git a/include/linux/init.h b/include/linux/init.h
index 79af0962fd52..94769d687cf0 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -39,7 +39,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(.init.text) __cold notrace __latent_entropy
+#define __init __section(.init.text) __cold __inittrace __latent_entropy
#define __initdata __section(.init.data)
#define __initconst __section(.init.rodata)
#define __exitdata __section(.exit.data)
@@ -68,8 +68,10 @@
#ifdef MODULE
#define __exitused
+#define __inittrace notrace
#else
#define __exitused __used
+#define __inittrace
#endif
#define __exit __section(.exit.text) __exitused __cold notrace
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index de88b33c0974..dea8f17b2fe3 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -97,6 +97,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
unsigned long secs,
unsigned long c_old,
unsigned long c);
+bool rcu_irq_enter_disabled(void);
#else
static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
int *flags,
@@ -113,6 +114,10 @@ static inline void rcutorture_record_test_transition(void)
static inline void rcutorture_record_progress(unsigned long vernum)
{
}
+static inline bool rcu_irq_enter_disabled(void)
+{
+ return false;
+}
#ifdef CONFIG_RCU_TRACE
void do_trace_rcu_torture_read(const char *rcutorturename,
struct rcu_head *rhp,
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b6d4568795a7..ee9b461af095 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -185,7 +185,7 @@ size_t ring_buffer_page_len(void *page);
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
-void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
+void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data);
int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
size_t len, int cpu, int full);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 0af63c4381b9..a556805eff8a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -138,16 +138,7 @@ enum print_line_t {
TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
};
-/*
- * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
- * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
- * simplifies those functions and keeps them in sync.
- */
-static inline enum print_line_t trace_handle_return(struct trace_seq *s)
-{
- return trace_seq_has_overflowed(s) ?
- TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
-}
+enum print_line_t trace_handle_return(struct trace_seq *s);
void tracing_generic_entry_update(struct trace_entry *entry,
unsigned long flags,
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index f72fcfe0e66a..cc48cb2ce209 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -128,7 +128,7 @@ extern void syscall_unregfunc(void);
* as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
* "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
*/
-#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \
+#define __DO_TRACE(tp, proto, args, cond, rcucheck) \
do { \
struct tracepoint_func *it_func_ptr; \
void *it_func; \
@@ -136,7 +136,11 @@ extern void syscall_unregfunc(void);
\
if (!(cond)) \
return; \
- prercu; \
+ if (rcucheck) { \
+ if (WARN_ON_ONCE(rcu_irq_enter_disabled())) \
+ return; \
+ rcu_irq_enter_irqson(); \
+ } \
rcu_read_lock_sched_notrace(); \
it_func_ptr = rcu_dereference_sched((tp)->funcs); \
if (it_func_ptr) { \
@@ -147,20 +151,19 @@ extern void syscall_unregfunc(void);
} while ((++it_func_ptr)->func); \
} \
rcu_read_unlock_sched_notrace(); \
- postrcu; \
+ if (rcucheck) \
+ rcu_irq_exit_irqson(); \
} while (0)
#ifndef MODULE
-#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \
+#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \
static inline void trace_##name##_rcuidle(proto) \
{ \
if (static_key_false(&__tracepoint_##name.key)) \
__DO_TRACE(&__tracepoint_##name, \
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
- TP_CONDITION(cond), \
- rcu_irq_enter_irqson(), \
- rcu_irq_exit_irqson()); \
+ TP_CONDITION(cond), 1); \
}
#else
#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)
@@ -186,7 +189,7 @@ extern void syscall_unregfunc(void);
__DO_TRACE(&__tracepoint_##name, \
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
- TP_CONDITION(cond),,); \
+ TP_CONDITION(cond), 0); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
rcu_read_lock_sched_notrace(); \
rcu_dereference_sched(__tracepoint_##name.funcs);\
diff --git a/init/main.c b/init/main.c
index b1b9dbf7622f..cc48053bb39f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -545,6 +545,11 @@ asmlinkage __visible void __init start_kernel(void)
trap_init();
mm_init();
+ ftrace_init();
+
+ /* trace_printk can be enabled here */
+ early_trace_init();
+
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
@@ -570,7 +575,7 @@ asmlinkage __visible void __init start_kernel(void)
rcu_init();
- /* trace_printk() and trace points may be used after this */
+ /* Trace events are available after this */
trace_init();
context_tracking_init();
@@ -670,8 +675,6 @@ asmlinkage __visible void __init start_kernel(void)
efi_free_boot_services();
}
- ftrace_init();
-
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
@@ -959,6 +962,7 @@ static int __ref kernel_init(void *unused)
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
+ ftrace_free_init_mem();
free_initmem();
mark_readonly();
system_state = SYSTEM_RUNNING;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 50fee7689e71..a6dcf3bd244f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -57,6 +57,7 @@
#include <linux/random.h>
#include <linux/trace_events.h>
#include <linux/suspend.h>
+#include <linux/ftrace.h>
#include "tree.h"
#include "rcu.h"
@@ -284,6 +285,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
};
/*
+ * There's a few places, currently just in the tracing infrastructure,
+ * that uses rcu_irq_enter() to make sure RCU is watching. But there's
+ * a small location where that will not even work. In those cases
+ * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter()
+ * can be called.
+ */
+static DEFINE_PER_CPU(bool, disable_rcu_irq_enter);
+
+bool rcu_irq_enter_disabled(void)
+{
+ return this_cpu_read(disable_rcu_irq_enter);
+}
+
+/*
* Record entry into an extended quiescent state. This is only to be
* called when not already in an extended quiescent state.
*/
@@ -771,25 +786,24 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
}
/*
- * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
+ * rcu_eqs_enter_common - current CPU is entering an extended quiescent state
*
- * If the new value of the ->dynticks_nesting counter now is zero,
- * we really have entered idle, and must do the appropriate accounting.
- * The caller must have disabled interrupts.
+ * Enter idle, doing appropriate accounting. The caller must have
+ * disabled interrupts.
*/
-static void rcu_eqs_enter_common(long long oldval, bool user)
+static void rcu_eqs_enter_common(bool user)
{
struct rcu_state *rsp;
struct rcu_data *rdp;
- RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
+ struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
+ trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
!user && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused =
idle_task(smp_processor_id());
- trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
+ trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0);
rcu_ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
@@ -800,7 +814,10 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
do_nocb_deferred_wakeup(rdp);
}
rcu_prepare_for_idle();
- rcu_dynticks_eqs_enter();
+ __this_cpu_inc(disable_rcu_irq_enter);
+ rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */
+ rcu_dynticks_eqs_enter(); /* After this, tracing works again. */
+ __this_cpu_dec(disable_rcu_irq_enter);
rcu_dynticks_task_enter();
/*
@@ -821,19 +838,15 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
*/
static void rcu_eqs_enter(bool user)
{
- long long oldval;
struct rcu_dynticks *rdtp;
rdtp = this_cpu_ptr(&rcu_dynticks);
- oldval = rdtp->dynticks_nesting;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
- (oldval & DYNTICK_TASK_NEST_MASK) == 0);
- if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
- rdtp->dynticks_nesting = 0;
- rcu_eqs_enter_common(oldval, user);
- } else {
+ (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
+ if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
+ rcu_eqs_enter_common(user);
+ else
rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
- }
}
/**
@@ -892,19 +905,18 @@ void rcu_user_enter(void)
*/
void rcu_irq_exit(void)
{
- long long oldval;
struct rcu_dynticks *rdtp;
RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
- oldval = rdtp->dynticks_nesting;
- rdtp->dynticks_nesting--;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
- rdtp->dynticks_nesting < 0);
- if (rdtp->dynticks_nesting)
- trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
- else
- rcu_eqs_enter_common(oldval, true);
+ rdtp->dynticks_nesting < 1);
+ if (rdtp->dynticks_nesting <= 1) {
+ rcu_eqs_enter_common(true);
+ } else {
+ trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
+ rdtp->dynticks_nesting--;
+ }
rcu_sysidle_enter(1);
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 9619b5768e4b..7e06f04e98fe 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -134,7 +134,8 @@ config FUNCTION_TRACER
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
- select GLOB
+ select GLOB
+ select TASKS_RCU if PREEMPT
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index dd3e91d68dc7..00077a57b746 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -36,6 +36,7 @@
#include <trace/events/sched.h>
+#include <asm/sections.h>
#include <asm/setup.h>
#include "trace_output.h"
@@ -1095,22 +1096,20 @@ static bool update_all_ops;
# error Dynamic ftrace depends on MCOUNT_RECORD
#endif
-static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
-
-struct ftrace_func_probe {
- struct hlist_node node;
- struct ftrace_probe_ops *ops;
- unsigned long flags;
- unsigned long ip;
- void *data;
- struct list_head free_list;
-};
-
struct ftrace_func_entry {
struct hlist_node hlist;
unsigned long ip;
};
+struct ftrace_func_probe {
+ struct ftrace_probe_ops *probe_ops;
+ struct ftrace_ops ops;
+ struct trace_array *tr;
+ struct list_head list;
+ void *data;
+ int ref;
+};
+
/*
* We make these constant because no one should touch them,
* but they are used as the default "empty hash", to avoid allocating
@@ -1271,7 +1270,7 @@ static void
remove_hash_entry(struct ftrace_hash *hash,
struct ftrace_func_entry *entry)
{
- hlist_del(&entry->hlist);
+ hlist_del_rcu(&entry->hlist);
hash->count--;
}
@@ -2807,18 +2806,28 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
* callers are done before leaving this function.
* The same goes for freeing the per_cpu data of the per_cpu
* ops.
- *
- * Again, normal synchronize_sched() is not good enough.
- * We need to do a hard force of sched synchronization.
- * This is because we use preempt_disable() to do RCU, but
- * the function tracers can be called where RCU is not watching
- * (like before user_exit()). We can not rely on the RCU
- * infrastructure to do the synchronization, thus we must do it
- * ourselves.
*/
if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) {
+ /*
+ * We need to do a hard force of sched synchronization.
+ * This is because we use preempt_disable() to do RCU, but
+ * the function tracers can be called where RCU is not watching
+ * (like before user_exit()). We can not rely on the RCU
+ * infrastructure to do the synchronization, thus we must do it
+ * ourselves.
+ */
schedule_on_each_cpu(ftrace_sync);
+ /*
+ * When the kernel is preeptive, tasks can be preempted
+ * while on a ftrace trampoline. Just scheduling a task on
+ * a CPU is not good enough to flush them. Calling
+ * synchornize_rcu_tasks() will wait for those tasks to
+ * execute and either schedule voluntarily or enter user space.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT))
+ synchronize_rcu_tasks();
+
arch_ftrace_trampoline_free(ops);
if (ops->flags & FTRACE_OPS_FL_PER_CPU)
@@ -3055,34 +3064,63 @@ struct ftrace_iterator {
struct ftrace_page *pg;
struct dyn_ftrace *func;
struct ftrace_func_probe *probe;
+ struct ftrace_func_entry *probe_entry;
struct trace_parser parser;
struct ftrace_hash *hash;
struct ftrace_ops *ops;
- int hidx;
+ int pidx;
int idx;
unsigned flags;
};
static void *
-t_hash_next(struct seq_file *m, loff_t *pos)
+t_probe_next(struct seq_file *m, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
+ struct trace_array *tr = iter->ops->private;
+ struct list_head *func_probes;
+ struct ftrace_hash *hash;
+ struct list_head *next;
struct hlist_node *hnd = NULL;
struct hlist_head *hhd;
+ int size;
(*pos)++;
iter->pos = *pos;
- if (iter->probe)
- hnd = &iter->probe->node;
- retry:
- if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
+ if (!tr)
return NULL;
- hhd = &ftrace_func_hash[iter->hidx];
+ func_probes = &tr->func_probes;
+ if (list_empty(func_probes))
+ return NULL;
+
+ if (!iter->probe) {
+ next = func_probes->next;
+ iter->probe = list_entry(next, struct ftrace_func_probe, list);
+ }
+
+ if (iter->probe_entry)
+ hnd = &iter->probe_entry->hlist;
+
+ hash = iter->probe->ops.func_hash->filter_hash;
+ size = 1 << hash->size_bits;
+
+ retry:
+ if (iter->pidx >= size) {
+ if (iter->probe->list.next == func_probes)
+ return NULL;
+ next = iter->probe->list.next;
+ iter->probe = list_entry(next, struct ftrace_func_probe, list);
+ hash = iter->probe->ops.func_hash->filter_hash;
+ size = 1 << hash->size_bits;
+ iter->pidx = 0;
+ }
+
+ hhd = &hash->buckets[iter->pidx];
if (hlist_empty(hhd)) {
- iter->hidx++;
+ iter->pidx++;
hnd = NULL;
goto retry;
}
@@ -3092,7 +3130,7 @@ t_hash_next(struct seq_file *m, loff_t *pos)
else {
hnd = hnd->next;
if (!hnd) {
- iter->hidx++;
+ iter->pidx++;
goto retry;
}
}
@@ -3100,26 +3138,28 @@ t_hash_next(struct seq_file *m, loff_t *pos)
if (WARN_ON_ONCE(!hnd))