summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 13:35:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 13:35:07 -0700
commit92b29b86fe2e183d44eb467e5e74a5f718ef2e43 (patch)
tree1bac8a1aa11d47322b66d10ec3a370016d843d06 /kernel
parentb9d7ccf56be1ac77b71a284a1c0e6337f9a7aff0 (diff)
parent98d9c66ab07471006fd7910cb16453581c41a3e7 (diff)
Merge branch 'tracing-v28-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-v28-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (131 commits) tracing/fastboot: improve help text tracing/stacktrace: improve help text tracing/fastboot: fix initcalls disposition in bootgraph.pl tracing/fastboot: fix bootgraph.pl initcall name regexp tracing/fastboot: fix issues and improve output of bootgraph.pl tracepoints: synchronize unregister static inline tracepoints: tracepoint_synchronize_unregister() ftrace: make ftrace_test_p6nop disassembler-friendly markers: fix synchronize marker unregister static inline tracing/fastboot: add better resolution to initcall debug/tracing trace: add build-time check to avoid overrunning hex buffer ftrace: fix hex output mode of ftrace tracing/fastboot: fix initcalls disposition in bootgraph.pl tracing/fastboot: fix printk format typo in boot tracer ftrace: return an error when setting a nonexistent tracer ftrace: make some tracers reentrant ring-buffer: make reentrant ring-buffer: move page indexes into page headers tracing/fastboot: only trace non-module initcalls ftrace: move pc counter in irqtrace ... Manually fix conflicts: - init/main.c: initcall tracing - kernel/module.c: verbose level vs tracepoints - scripts/bootgraph.pl: fallout from cherry-picking commits.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/marker.c36
-rw-r--r--kernel/module.c81
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/sched.c17
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/trace/Kconfig64
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c275
-rw-r--r--kernel/trace/ring_buffer.c2014
-rw-r--r--kernel/trace/trace.c1845
-rw-r--r--kernel/trace/trace.h211
-rw-r--r--kernel/trace/trace_boot.c126
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c19
-rw-r--r--kernel/trace/trace_mmiotrace.c116
-rw-r--r--kernel/trace/trace_nop.c64
-rw-r--r--kernel/trace/trace_sched_switch.c137
-rw-r--r--kernel/trace/trace_sched_wakeup.c148
-rw-r--r--kernel/trace/trace_selftest.c83
-rw-r--r--kernel/trace/trace_stack.c310
-rw-r--r--kernel/trace/trace_sysprof.c2
-rw-r--r--kernel/tracepoint.c477
26 files changed, 4711 insertions, 1344 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 066550aa61c5..305f11dbef21 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FTRACE) += trace/
diff --git a/kernel/exit.c b/kernel/exit.c
index 059b38cae384..80137a5d9467 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,6 +47,7 @@
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
+#include <trace/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -146,7 +147,10 @@ static void __exit_signal(struct task_struct *tsk)
static void delayed_put_task_struct(struct rcu_head *rhp)
{
- put_task_struct(container_of(rhp, struct task_struct, rcu));
+ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+ trace_sched_process_free(tsk);
+ put_task_struct(tsk);
}
@@ -1070,6 +1074,8 @@ NORET_TYPE void do_exit(long code)
if (group_dead)
acct_process();
+ trace_sched_process_exit(tsk);
+
exit_sem(tsk);
exit_files(tsk);
exit_fs(tsk);
@@ -1675,6 +1681,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
struct task_struct *tsk;
int retval;
+ trace_sched_process_wait(pid);
+
add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 44e64d7ba29b..4d093552dd6e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
+#include <trace/sched.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -1372,6 +1373,8 @@ long do_fork(unsigned long clone_flags,
if (!IS_ERR(p)) {
struct completion vfork;
+ trace_sched_process_fork(current, p);
+
nr = task_pid_vnr(p);
if (clone_flags & CLONE_PARENT_SETTID)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 14ec64fe175a..8e7a7ce3ed0a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <trace/sched.h>
#define KTHREAD_NICE_LEVEL (-5)
@@ -205,6 +206,8 @@ int kthread_stop(struct task_struct *k)
/* It could exit after stop_info.k set, but before wake_up_process. */
get_task_struct(k);
+ trace_sched_kthread_stop(k);
+
/* Must init completion *before* thread sees kthread_stop_info.k */
init_completion(&kthread_stop_info.done);
smp_wmb();
@@ -220,6 +223,8 @@ int kthread_stop(struct task_struct *k)
ret = kthread_stop_info.err;
mutex_unlock(&kthread_stop_lock);
+ trace_sched_kthread_stop_ret(ret);
+
return ret;
}
EXPORT_SYMBOL(kthread_stop);
diff --git a/kernel/marker.c b/kernel/marker.c
index 7d1faecd7a51..e9c6b2bc9400 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -62,7 +62,7 @@ struct marker_entry {
int refcount; /* Number of times armed. 0 if disarmed. */
struct rcu_head rcu;
void *oldptr;
- unsigned char rcu_pending:1;
+ int rcu_pending;
unsigned char ptype:1;
char name[0]; /* Contains name'\0'format'\0' */
};
@@ -103,11 +103,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
char ptype;
/*
- * preempt_disable does two things : disabling preemption to make sure
- * the teardown of the callbacks can be done correctly when they are in
- * modules and they insure RCU read coherency.
+ * rcu_read_lock_sched does two things : disabling preemption to make
+ * sure the teardown of the callbacks can be done correctly when they
+ * are in modules and they insure RCU read coherency.
*/
- preempt_disable();
+ rcu_read_lock_sched();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
@@ -145,7 +145,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
va_end(args);
}
}
- preempt_enable();
+ rcu_read_unlock_sched();
}
EXPORT_SYMBOL_GPL(marker_probe_cb);
@@ -162,7 +162,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
va_list args; /* not initialized */
char ptype;
- preempt_disable();
+ rcu_read_lock_sched();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
@@ -195,7 +195,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
}
- preempt_enable();
+ rcu_read_unlock_sched();
}
EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
@@ -560,7 +560,7 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
* Disable a marker and its probe callback.
* Note: only waiting an RCU period after setting elem->call to the empty
* function insures that the original callback is not used anymore. This insured
- * by preempt_disable around the call site.
+ * by rcu_read_lock_sched around the call site.
*/
static void disable_marker(struct marker *elem)
{
@@ -653,11 +653,17 @@ int marker_probe_register(const char *name, const char *format,
entry = get_marker(name);
if (!entry) {
entry = add_marker(name, format);
- if (IS_ERR(entry)) {
+ if (IS_ERR(entry))
ret = PTR_ERR(entry);
- goto end;
- }
+ } else if (format) {
+ if (!entry->format)
+ ret = marker_set_format(&entry, format);
+ else if (strcmp(entry->format, format))
+ ret = -EPERM;
}
+ if (ret)
+ goto end;
+
/*
* If we detect that a call_rcu is pending for this marker,
* make sure it's executed now.
@@ -674,6 +680,8 @@ int marker_probe_register(const char *name, const char *format,
mutex_lock(&markers_mutex);
entry = get_marker(name);
WARN_ON(!entry);
+ if (entry->rcu_pending)
+ rcu_barrier_sched();
entry->oldptr = old;
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
@@ -717,6 +725,8 @@ int marker_probe_unregister(const char *name,
entry = get_marker(name);
if (!entry)
goto end;
+ if (entry->rcu_pending)
+ rcu_barrier_sched();
entry->oldptr = old;
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
@@ -795,6 +805,8 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
mutex_lock(&markers_mutex);
entry = get_marker_from_private_data(probe, probe_private);
WARN_ON(!entry);
+ if (entry->rcu_pending)
+ rcu_barrier_sched();
entry->oldptr = old;
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
diff --git a/kernel/module.c b/kernel/module.c
index 25bc9ac9e226..0d8d21ee792c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -46,6 +46,8 @@
#include <asm/cacheflush.h>
#include <linux/license.h>
#include <asm/sections.h>
+#include <linux/tracepoint.h>
+#include <linux/ftrace.h>
#if 0
#define DEBUGP printk
@@ -1430,6 +1432,9 @@ static void free_module(struct module *mod)
/* Module unload stuff */
module_unload_free(mod);
+ /* release any pointers to mcount in this module */
+ ftrace_release(mod->module_core, mod->core_size);
+
/* This may be NULL, but that's OK */
module_free(mod, mod->module_init);
kfree(mod->args);
@@ -1861,9 +1866,13 @@ static noinline struct module *load_module(void __user *umod,
unsigned int markersindex;
unsigned int markersstringsindex;
unsigned int verboseindex;
+ unsigned int tracepointsindex;
+ unsigned int tracepointsstringsindex;
+ unsigned int mcountindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+ void *mseg;
struct exception_table_entry *extable;
mm_segment_t old_fs;
@@ -2156,6 +2165,12 @@ static noinline struct module *load_module(void __user *umod,
markersstringsindex = find_sec(hdr, sechdrs, secstrings,
"__markers_strings");
verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
+ tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+ tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+ "__tracepoints_strings");
+
+ mcountindex = find_sec(hdr, sechdrs, secstrings,
+ "__mcount_loc");
/* Now do relocations. */
for (i = 1; i < hdr->e_shnum; i++) {
@@ -2183,6 +2198,12 @@ static noinline struct module *load_module(void __user *umod,
mod->num_markers =
sechdrs[markersindex].sh_size / sizeof(*mod->markers);
#endif
+#ifdef CONFIG_TRACEPOINTS
+ mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+ mod->num_tracepoints =
+ sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
/* Find duplicate symbols */
err = verify_export_symbols(mod);
@@ -2201,12 +2222,22 @@ static noinline struct module *load_module(void __user *umod,
add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+ if (!mod->taints) {
#ifdef CONFIG_MARKERS
- if (!mod->taints)
marker_update_probe_range(mod->markers,
mod->markers + mod->num_markers);
#endif
dynamic_printk_setup(sechdrs, verboseindex);
+#ifdef CONFIG_TRACEPOINTS
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+#endif
+ }
+
+ /* sechdrs[0].sh_size is always zero */
+ mseg = (void *)sechdrs[mcountindex].sh_addr;
+ ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
+
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
goto cleanup;
@@ -2276,6 +2307,7 @@ static noinline struct module *load_module(void __user *umod,
cleanup:
kobject_del(&mod->mkobj.kobj);
kobject_put(&mod->mkobj.kobj);
+ ftrace_release(mod->module_core, mod->core_size);
free_unload:
module_unload_free(mod);
module_free(mod, mod->module_init);
@@ -2759,3 +2791,50 @@ void module_update_markers(void)
mutex_unlock(&module_mutex);
}
#endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+ struct module *mod;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(mod, &modules, list)
+ if (!mod->taints)
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+ mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+ struct module *iter_mod;
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(iter_mod, &modules, list) {
+ if (!iter_mod->taints) {
+ /*
+ * Sorted module list
+ */
+ if (iter_mod < iter->module)
+ continue;
+ else if (iter_mod > iter->module)
+ iter->tracepoint = NULL;
+ found = tracepoint_get_iter_range(&iter->tracepoint,
+ iter_mod->tracepoints,
+ iter_mod->tracepoints
+ + iter_mod->num_tracepoints);
+ if (found) {
+ iter->module = iter_mod;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&module_mutex);
+ return found;
+}
+#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 823be11584ef..4282c0a40a57 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
static ATOMIC_NOTIFIER_HEAD(die_chain);
-int notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
struct pt_regs *regs, long err, int trap, int sig)
{
struct die_args args = {
diff --git a/kernel/sched.c b/kernel/sched.c
index 09a8c15748f1..d906f72b42d2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include <trace/sched.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -1936,6 +1937,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* just go back and repeat.
*/
rq = task_rq_lock(p, &flags);
+ trace_sched_wait_task(rq, p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
ncsw = 0;
@@ -2297,9 +2299,7 @@ out_activate:
success = 1;
out_running:
- trace_mark(kernel_sched_wakeup,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup(rq, p);
check_preempt_curr(rq, p, sync);
p->state = TASK_RUNNING;
@@ -2432,9 +2432,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
- trace_mark(kernel_sched_wakeup_new,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup_new(rq, p);
check_preempt_curr(rq, p, 0);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@@ -2607,11 +2605,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
- trace_mark(kernel_sched_schedule,
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- prev->pid, next->pid, prev->state,
- rq, prev, next);
+ trace_sched_switch(rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -2851,6 +2845,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
|| unlikely(!cpu_active(dest_cpu)))
goto out;
+ trace_sched_migrate_task(rq, p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
diff --git a/kernel/signal.c b/kernel/signal.c
index 6eea5826d618..105217da5c82 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,6 +27,7 @@
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
+#include <trace/sched.h>
#include <asm/param.h>
#include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
struct sigpending *pending;
struct sigqueue *q;
+ trace_sched_signal_send(sig, t);
+
assert_spin_locked(&t->sighand->siglock);
if (!prepare_signal(sig, t))
return 0;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..1cb3e1f616af 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,37 @@
#
# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
#
+
+config NOP_TRACER
+ bool
+
config HAVE_FTRACE
bool
+ select NOP_TRACER
config HAVE_DYNAMIC_FTRACE
bool
+config HAVE_FTRACE_MCOUNT_RECORD
+ bool
+
config TRACER_MAX_TRACE
bool
+config RING_BUFFER
+ bool
+
config TRACING
bool
select DEBUG_FS
+ select RING_BUFFER
select STACKTRACE
+ select TRACEPOINTS
config FTRACE
bool "Kernel Function Tracer"
depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select FRAME_POINTER
select TRACING
select CONTEXT_SWITCH_TRACER
@@ -36,6 +50,7 @@ config IRQSOFF_TRACER
depends on TRACE_IRQFLAGS_SUPPORT
depends on GENERIC_TIME
depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACE_IRQFLAGS
select TRACING
select TRACER_MAX_TRACE
@@ -59,6 +74,7 @@ config PREEMPT_TRACER
depends on GENERIC_TIME
depends on PREEMPT
depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACING
select TRACER_MAX_TRACE
help
@@ -86,6 +102,7 @@ config SYSPROF_TRACER
config SCHED_TRACER
bool "Scheduling Latency Tracer"
depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACING
select CONTEXT_SWITCH_TRACER
select TRACER_MAX_TRACE
@@ -96,16 +113,56 @@ config SCHED_TRACER
config CONTEXT_SWITCH_TRACER
bool "Trace process context switches"
depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
select TRACING
select MARKERS
help
This tracer gets called from the context switch and records
all switching of tasks.
+config BOOT_TRACER
+ bool "Trace boot initcalls"
+ depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
+ select TRACING
+ help
+ This tracer helps developers to optimize boot times: it records
+ the timings of the initcalls and traces key events and the identity
+ of tasks that can cause boot delays, such as context-switches.
+
+ Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+ produce pretty graphics about boot inefficiencies, giving a visual
+ representation of the delays during initcalls - but the raw
+ /debug/tracing/trace text output is readable too.
+
+ ( Note that tracing self tests can't be enabled if this tracer is
+ selected, because the self-tests are an initcall as well and that
+ would invalidate the boot trace. )
+
+config STACK_TRACER
+ bool "Trace max stack"
+ depends on HAVE_FTRACE
+ depends on DEBUG_KERNEL
+ select FTRACE
+ select STACKTRACE
+ help
+ This special tracer records the maximum stack footprint of the
+ kernel and displays it in debugfs/tracing/stack_trace.
+
+ This tracer works by hooking into every function call that the
+ kernel executes, and keeping a maximum stack depth value and
+ stack-trace saved. Because this logic has to execute in every
+ kernel function, all the time, this option can slow down the
+ kernel measurably and is generally intended for kernel
+ developers only.
+
+ Say N if unsure.
+
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FTRACE
depends on HAVE_DYNAMIC_FTRACE
+ depends on DEBUG_KERNEL
default y
help
This option will modify all the calls to ftrace dynamically
@@ -121,12 +178,17 @@ config DYNAMIC_FTRACE
were made. If so, it runs stop_machine (stops all CPUS)
and modifies the code to jump over the call to ftrace.
+config FTRACE_MCOUNT_RECORD
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on HAVE_FTRACE_MCOUNT_RECORD
+
config FTRACE_SELFTEST
bool
config FTRACE_STARTUP_TEST
bool "Perform a startup test on ftrace"
- depends on TRACING
+ depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
select FTRACE_SELFTEST
help
This option performs a series of startup tests on ftrace. On bootup
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de17288..a85dfba88ba0 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -11,6 +11,7 @@ obj-y += trace_selftest_dynamic.o
endif
obj-$(CONFIG_FTRACE) += libftrace.o
+obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
@@ -19,6 +20,9 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_NOP_TRACER) += trace_nop.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..4dda4f60a2a9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -81,7 +81,7 @@ void clear_ftrace_function(void)
static int __register_ftrace_function(struct ftrace_ops *ops)
{
- /* Should never be called by interrupts */
+ /* should not be called from interrupt context */
spin_lock(&ftrace_lock);
ops->next = ftrace_list;
@@ -115,6 +115,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
struct ftrace_ops **p;
int ret = 0;
+ /* should not be called from interrupt context */
spin_lock(&ftrace_lock);
/*
@@ -153,6 +154,30 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
#ifdef CONFIG_DYNAMIC_FTRACE
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+/*
+ * The hash lock is only needed when the recording of the mcount
+ * callers are dynamic. That is, by the caller themselves and
+ * not recorded via the compilation.
+ */
+static DEFINE_SPINLOCK(ftrace_hash_lock);
+#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
+#define ftrace_hash_unlock(flags) \
+ spin_unlock_irqrestore(&ftrace_hash_lock, flags)
+#else
+/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
+#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
+#define ftrace_hash_unlock(flags) do { } while(0)
+#endif
+
+/*
+ * Since MCOUNT_ADDR may point to mcount itself, we do not want
+ * to get it confused by reading a reference in the code as we
+ * are parsing on objcopy output of text. Use a variable for
+ * it instead.
+ */
+static unsigned long mcount_addr = MCOUNT_ADDR;
+
static struct task_struct *ftraced_task;
enum {
@@ -171,7 +196,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
-static DEFINE_SPINLOCK(ftrace_shutdown_lock);
static DEFINE_MUTEX(ftraced_lock);
static DEFINE_MUTEX(ftrace_regex_lock);
@@ -294,13 +318,37 @@ static inline void ftrace_del_hash(struct dyn_ftrace *node)
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
- /* no locking, only called from kstop_machine */
-
rec->ip = (unsigned long)ftrace_free_records;
ftrace_free_records = rec;
rec->flags |= FTRACE_FL_FREE;
}
+void ftrace_release(void *start, unsigned long size)
+{
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+ unsigned long s = (unsigned long)start;
+ unsigned long e = s + size;
+ int i;
+
+ if (ftrace_disabled || !start)
+ return;
+
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
+
+ if ((rec->ip >= s) && (rec->ip < e))
+ ftrace_free_rec(rec);
+ }
+ }
+ spin_unlock(&ftrace_lock);
+
+}
+
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
{
struct dyn_ftrace *rec;
@@ -338,7 +386,6 @@ ftrace_record_ip(unsigned long ip)
unsigned long flags;
unsigned long key;
int resched;
- int atomic;
int cpu;
if (!ftrace_enabled || ftrace_disabled)
@@ -368,9 +415,7 @@ ftrace_record_ip(unsigned long ip)
if (ftrace_ip_in_hash(ip, key))
goto out;
- atomic = irqs_disabled();
-
- spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+ ftrace_hash_lock(flags);
/* This ip may have hit the hash before the lock */
if (ftrace_ip_in_hash(ip, key))
@@ -387,7 +432,7 @@ ftrace_record_ip(unsigned long ip)
ftraced_trigger = 1;
out_unlock:
- spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
+ ftrace_hash_unlock(flags);
out:
per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
@@ -531,6 +576,16 @@ static void ftrace_shutdown_replenish(void)
ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
}
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_CONT "%s", fmt);
+
+ for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+ printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
static int
ftrace_code_disable(struct dyn_ftrace *rec)
{
@@ -541,10 +596,27 @@ ftrace_code_disable(struct dyn_ftrace *rec)
ip = rec->ip;
nop = ftrace_nop_replace();
- call = ftrace_call_replace(ip, MCOUNT_ADDR);
+ call = ftrace_call_replace(ip, mcount_addr);
failed = ftrace_modify_code(ip, call, nop);
if (failed) {
+ switch (failed) {
+ case 1:
+ WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on modifying ");
+ print_ip_sym(ip);
+ break;
+ case 2:
+ WARN_ON_ONCE(1);
+ pr_info("ftrace failed to modify ");
+ print_ip_sym(ip);
+ print_ip_ins(" expected: ", call);
+ print_ip_ins(" actual: ", (unsigned char *)ip);
+ print_ip_ins(" replace: ", nop);
+ printk(KERN_CONT "\n");
+ break;
+ }
+
rec->flags |= FTRACE_FL_FAILED;
return 0;
}
@@ -792,47 +864,7 @@ static int ftrace_update_code(void)
return 1;
}
-static int ftraced(void *ignore)
-{
- unsigned long usecs;
-
- while (!kthread_should_stop()) {
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- /* check once a second */
- schedule_timeout(HZ);
-
- if (unlikely(ftrace_disabled))
- continue;
-
- mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftraced_lock);
- if (!ftraced_suspend && !ftraced_stop &&
- ftrace_update_code()) {
- usecs = nsecs_to_usecs(ftrace_update_time);
- if (ftrace_update_tot_cnt > 100000) {
- ftrace_update_tot_cnt = 0;
- pr_info("hm, dftrace overflow: %lu change%s"
- " (%lu total) in %lu usec%s\n",
- ftrace_update_cnt,
- ftrace_update_cnt != 1 ? "s" : "",
- ftrace_update_tot_cnt,
- usecs, usecs != 1 ? "s" : "");
- ftrace_disabled = 1;
- WARN_ON_ONCE(1);
- }
- }
- mutex_unlock(&ftraced_lock);
- mutex_unlock(&ftrace_sysctl_lock);
-
- ftrace_shutdown_replenish();
- }
- __set_current_state(TASK_RUNNING);
- return 0;
-}
-
-static int __init ftrace_dyn_table_alloc(void)
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
{
struct ftrace_page *pg;
int cnt;
@@ -859,7 +891,9 @@ static int __init ftrace_dyn_table_alloc(void)
pg = ftrace_pages = ftrace_pages_start;
- cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+ cnt = num_to_init / ENTRIES_PER_PAGE;
+ pr_info("ftrace: allocating %ld hash entries in %d pages\n",
+ num_to_init, cnt);
for (i = 0; i < cnt; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -901,6 +935,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
(*pos)++;
+ /* should not be called from interrupt context */
+ spin_lock(&ftrace_lock);
retry:
if (iter->idx >= iter->pg->index) {
if (iter->pg->next) {
@@ -910,15 +946,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
}
} else {
rec = &iter->pg->records[iter->idx++];
- if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
+ if ((rec->flags & FTRACE_FL_FREE) ||
+
+ (!(iter->flags & FTRACE_ITER_FAILURES) &&
(rec->flags & FTRACE_FL_FAILED)) ||
((iter->flags & FTRACE_ITER_FAILURES) &&
- (!(rec->flags & FTRACE_FL_FAILED) ||
- (rec->flags & FTRACE_FL_FREE))) ||
-
- ((iter->flags & FTRACE_ITER_FILTER) &&
- !(rec->flags & FTRACE_FL_FILTER)) ||
+ !(rec->flags & FTRACE_FL_FAILED)) ||