summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 13:22:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 13:22:17 -0800
commit09c0796adf0c793462fda1d7c8c43324551405c7 (patch)
tree90893d337b215482f059dd7e522279b23ffa0961 /kernel
parent312dcaf967219effe0483785f24e4072a5bed9a5 (diff)
parentf6a694665f132cbf6e2222dd2f173dc35330a8aa (diff)
Merge tag 'trace-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt: "The major update to this release is that there's a new arch config option called CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS. Currently, only x86_64 enables it. All the ftrace callbacks now take a struct ftrace_regs instead of a struct pt_regs. If the architecture has HAVE_DYNAMIC_FTRACE_WITH_ARGS enabled, then the ftrace_regs will have enough information to read the arguments of the function being traced, as well as access to the stack pointer. This way, if a user (like live kernel patching) only cares about the arguments, then it can avoid using the heavier weight "regs" callback, that puts in enough information in the struct ftrace_regs to simulate a breakpoint exception (needed for kprobes). A new config option that audits the timestamps of the ftrace ring buffer at most every event recorded. Ftrace recursion protection has been cleaned up to move the protection to the callback itself (this saves on an extra function call for those callbacks). Perf now handles its own RCU protection and does not depend on ftrace to do it for it (saving on that extra function call). New debug option to add "recursed_functions" file to tracefs that lists all the places that triggered the recursion protection of the function tracer. This will show where things need to be fixed as recursion slows down the function tracer. The eval enum mapping updates done at boot up are now offloaded to a work queue, as it caused a noticeable pause on slow embedded boards. Various clean ups and last minute fixes" * tag 'trace-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (33 commits) tracing: Offload eval map updates to a work queue Revert: "ring-buffer: Remove HAVE_64BIT_ALIGNED_ACCESS" ring-buffer: Add rb_check_bpage in __rb_allocate_pages ring-buffer: Fix two typos in comments tracing: Drop unneeded assignment in ring_buffer_resize() tracing: Disable ftrace selftests when any tracer is running seq_buf: Avoid type mismatch for seq_buf_init ring-buffer: Fix a typo in function description ring-buffer: Remove obsolete rb_event_is_commit() ring-buffer: Add test to validate the time stamp deltas ftrace/documentation: Fix RST C code blocks tracing: Clean up after filter logic rewriting tracing: Remove the useless value assignment in test_create_synth_event() livepatch: Use the default ftrace_ops instead of REGS when ARGS is available ftrace/x86: Allow for arguments to be passed in to ftrace_regs by default ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs MAINTAINERS: assign ./fs/tracefs to TRACING tracing: Fix some typos in comments ftrace: Remove unused varible 'ret' ring-buffer: Add recording of ring buffer recursion into recursed_functions ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/livepatch/Kconfig2
-rw-r--r--kernel/livepatch/patch.c15
-rw-r--r--kernel/trace/Kconfig68
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/fgraph.c3
-rw-r--r--kernel/trace/ftrace.c57
-rw-r--r--kernel/trace/ring_buffer.c223
-rw-r--r--kernel/trace/synth_event_gen_test.c2
-rw-r--r--kernel/trace/trace.c53
-rw-r--r--kernel/trace/trace.h182
-rw-r--r--kernel/trace/trace_benchmark.c6
-rw-r--r--kernel/trace/trace_boot.c2
-rw-r--r--kernel/trace/trace_dynevent.c2
-rw-r--r--kernel/trace/trace_dynevent.h6
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_event_perf.c15
-rw-r--r--kernel/trace/trace_events.c9
-rw-r--r--kernel/trace/trace_events_filter.c23
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--kernel/trace/trace_events_synth.c4
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_functions.c23
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--kernel/trace/trace_hwlat.c4
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c9
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_output.h1
-rw-r--r--kernel/trace/trace_recursion_record.c236
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_selftest.c29
-rw-r--r--kernel/trace/trace_stack.c3
-rw-r--r--kernel/trace/tracing_map.c6
-rw-r--r--kernel/trace/tracing_map.h2
36 files changed, 662 insertions, 352 deletions
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index 54102deb50ba..53d51ed619a3 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig
@@ -6,7 +6,7 @@ config HAVE_LIVEPATCH
config LIVEPATCH
bool "Kernel Live Patching"
- depends on DYNAMIC_FTRACE_WITH_REGS
+ depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
depends on MODULES
depends on SYSFS
depends on KALLSYMS_ALL
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index b552cf2d85f8..e8029aea67f1 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -40,14 +40,18 @@ struct klp_ops *klp_find_ops(void *old_func)
static void notrace klp_ftrace_handler(unsigned long ip,
unsigned long parent_ip,
struct ftrace_ops *fops,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs)
{
struct klp_ops *ops;
struct klp_func *func;
int patch_state;
+ int bit;
ops = container_of(fops, struct klp_ops, fops);
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (WARN_ON_ONCE(bit < 0))
+ return;
/*
* A variant of synchronize_rcu() is used to allow patching functions
* where RCU is not watching, see klp_synchronize_transition().
@@ -113,10 +117,11 @@ static void notrace klp_ftrace_handler(unsigned long ip,
if (func->nop)
goto unlock;
- klp_arch_set_pc(regs, (unsigned long)func->new_func);
+ klp_arch_set_pc(fregs, (unsigned long)func->new_func);
unlock:
preempt_enable_notrace();
+ ftrace_test_recursion_unlock(bit);
}
/*
@@ -194,8 +199,10 @@ static int klp_patch_func(struct klp_func *func)
return -ENOMEM;
ops->fops.func = klp_ftrace_handler;
- ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS |
- FTRACE_OPS_FL_DYNAMIC |
+ ops->fops.flags = FTRACE_OPS_FL_DYNAMIC |
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ FTRACE_OPS_FL_SAVE_REGS |
+#endif
FTRACE_OPS_FL_IPMODIFY |
FTRACE_OPS_FL_PERMANENT;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2f8d9cc42c40..d5a19413d4f8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -31,6 +31,15 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
bool
+config HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ bool
+ help
+ If this is set, then arguments and stack can be found from
+ the pt_regs passed into the function callback regs parameter
+ by default, even without setting the REGS flag in the ftrace_ops.
+ This allows for use of regs_get_kernel_argument() and
+ kernel_stack_pointer().
+
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
@@ -725,6 +734,45 @@ config TRACE_EVAL_MAP_FILE
If unsure, say N.
+config FTRACE_RECORD_RECURSION
+ bool "Record functions that recurse in function tracing"
+ depends on FUNCTION_TRACER
+ help
+ All callbacks that attach to the function tracing have some sort
+ of protection against recursion. Even though the protection exists,
+ it adds overhead. This option will create a file in the tracefs
+ file system called "recursed_functions" that will list the functions
+ that triggered a recursion.
+
+ This will add more overhead to cases that have recursion.
+
+ If unsure, say N
+
+config FTRACE_RECORD_RECURSION_SIZE
+ int "Max number of recursed functions to record"
+ default 128
+ depends on FTRACE_RECORD_RECURSION
+ help
+ This defines the limit of number of functions that can be
+ listed in the "recursed_functions" file, that lists all
+ the functions that caused a recursion to happen.
+ This file can be reset, but the limit can not change in
+ size at runtime.
+
+config RING_BUFFER_RECORD_RECURSION
+ bool "Record functions that recurse in the ring buffer"
+ depends on FTRACE_RECORD_RECURSION
+ # default y, because it is coupled with FTRACE_RECORD_RECURSION
+ default y
+ help
+ The ring buffer has its own internal recursion. Although when
+ recursion happens it wont cause harm because of the protection,
+ but it does cause an unwanted overhead. Enabling this option will
+ place where recursion was detected into the ftrace "recursed_functions"
+ file.
+
+ This will add more overhead to cases that have recursion.
+
config GCOV_PROFILE_FTRACE
bool "Enable GCOV profiling on ftrace subsystem"
depends on GCOV_KERNEL
@@ -795,6 +843,26 @@ config RING_BUFFER_STARTUP_TEST
If unsure, say N
+config RING_BUFFER_VALIDATE_TIME_DELTAS
+ bool "Verify ring buffer time stamp deltas"
+ depends on RING_BUFFER
+ help
+ This will audit the time stamps on the ring buffer sub
+ buffer to make sure that all the time deltas for the
+ events on a sub buffer matches the current time stamp.
+ This audit is performed for every event that is not
+ interrupted, or interrupting another event. A check
+ is also made when traversing sub buffers to make sure
+ that all the deltas on the previous sub buffer do not
+ add up to be greater than the current time stamp.
+
+ NOTE: This adds significant overhead to recording of events,
+ and should only be used to test the logic of the ring buffer.
+ Do not use it on production systems.
+
+ Only say Y if you understand what this does, and you
+ still want it enabled. Otherwise say N
+
config MMIOTRACE_TEST
tristate "Test module for mmiotrace"
depends on MMIOTRACE && m
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e153be351548..7e44cea89fdc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
+obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 456fe4ce6942..fb0fe4c66b84 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1279,7 +1279,7 @@ static void blk_log_action(struct trace_iterator *iter, const char *act,
* ones now use the 64bit ino as the whole ID and
* no longer use generation.
*
- * Regarldess of the content, always output
+ * Regardless of the content, always output
* "LOW32,HIGH32" so that FILEID_INO32_GEN fid can
* be mapped back to @id on both 64 and 32bit ino
* setups. See __kernfs_fh_to_dentry().
@@ -1321,7 +1321,7 @@ static void blk_log_dump_pdu(struct trace_seq *s,
i == 0 ? "" : " ", pdu_buf[i]);
/*
- * stop when the rest is just zeroes and indicate so
+ * stop when the rest is just zeros and indicate so
* with a ".." appended
*/
if (i == end && end != pdu_len - 1) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ebadaa83502c..6c0018abe68a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -116,7 +116,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
* to all call sites, we did a bpf_prog_array_valid() there to check
* whether call->prog_array is empty or not, which is
- * a heurisitc to speed up execution.
+ * a heuristic to speed up execution.
*
* If bpf_prog_array_valid() fetched prog_array was
* non-NULL, we go into trace_call_bpf() and do the actual
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 5658f13037b3..73edb9e4f354 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -334,8 +334,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
static struct ftrace_ops graph_ops = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID |
FTRACE_OPS_FL_STUB,
#ifdef FTRACE_GRAPH_TRAMP_ADDR
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9c1bba8cc51b..4d8e35575549 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -80,7 +80,7 @@ enum {
struct ftrace_ops ftrace_list_end __read_mostly = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
+ .flags = FTRACE_OPS_FL_STUB,
INIT_OPS_HASH(ftrace_list_end)
};
@@ -121,7 +121,7 @@ struct ftrace_ops global_ops;
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs);
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
#else
/* See comment below, where ftrace_ops_list_func is defined */
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
@@ -140,7 +140,7 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
}
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = op->private;
int pid;
@@ -154,7 +154,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
return;
}
- op->saved_func(ip, parent_ip, op, regs);
+ op->saved_func(ip, parent_ip, op, fregs);
}
static void ftrace_sync_ipi(void *data)
@@ -754,7 +754,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
static void
function_profile_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct ftrace_profile_stat *stat;
struct ftrace_profile *rec;
@@ -866,7 +866,7 @@ static void unregister_ftrace_profiler(void)
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
.func = function_profile_call,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+ .flags = FTRACE_OPS_FL_INITIALIZED,
INIT_OPS_HASH(ftrace_profile_ops)
};
@@ -1040,8 +1040,7 @@ struct ftrace_ops global_ops = {
.local_hash.notrace_hash = EMPTY_HASH,
.local_hash.filter_hash = EMPTY_HASH,
INIT_OPS_HASH(global_ops)
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID,
};
@@ -2146,6 +2145,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
else
rec->flags &= ~FTRACE_FL_TRAMP_EN;
}
+
if (flag & FTRACE_FL_DIRECT) {
/*
* If there's only one user (direct_ops helper)
@@ -2389,8 +2389,9 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
}
static void call_direct_funcs(unsigned long ip, unsigned long pip,
- struct ftrace_ops *ops, struct pt_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
+ struct pt_regs *regs = ftrace_get_regs(fregs);
unsigned long addr;
addr = ftrace_find_rec_direct(ip);
@@ -2402,7 +2403,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
struct ftrace_ops direct_ops = {
.func = call_direct_funcs,
- .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE
+ .flags = FTRACE_OPS_FL_IPMODIFY
| FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
| FTRACE_OPS_FL_PERMANENT,
/*
@@ -4183,7 +4184,6 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
struct ftrace_hash **orig_hash, *new_hash;
LIST_HEAD(process_mods);
char *func;
- int ret;
mutex_lock(&ops->func_hash->regex_lock);
@@ -4236,7 +4236,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move_and_update_ops(ops, orig_hash,
+ ftrace_hash_move_and_update_ops(ops, orig_hash,
new_hash, enable);
mutex_unlock(&ftrace_lock);
@@ -4314,7 +4314,7 @@ static int __init ftrace_mod_cmd_init(void)
core_initcall(ftrace_mod_cmd_init);
static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct ftrace_probe_ops *probe_ops;
struct ftrace_func_probe *probe;
@@ -5588,7 +5588,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
struct ftrace_hash **orig_hash;
struct trace_parser *parser;
int filter_hash;
- int ret;
if (file->f_mode & FMODE_READ) {
iter = m->private;
@@ -5616,7 +5615,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash = &iter->ops->func_hash->notrace_hash;
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
+ ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
iter->hash, filter_hash);
mutex_unlock(&ftrace_lock);
} else {
@@ -6884,8 +6883,7 @@ void ftrace_init_trace_array(struct trace_array *tr)
struct ftrace_ops global_ops = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID,
};
@@ -6935,12 +6933,13 @@ void ftrace_reset_array_ops(struct trace_array *tr)
static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ignored, struct pt_regs *regs)
+ struct ftrace_ops *ignored, struct ftrace_regs *fregs)
{
+ struct pt_regs *regs = ftrace_get_regs(fregs);
struct ftrace_ops *op;
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
@@ -6969,7 +6968,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
pr_warn("op=%p %pS\n", op, op);
goto out;
}
- op->func(ip, parent_ip, op, regs);
+ op->func(ip, parent_ip, op, fregs);
}
} while_for_each_ftrace_op(op);
out:
@@ -6992,9 +6991,9 @@ out:
*/
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- __ftrace_ops_list_func(ip, parent_ip, NULL, regs);
+ __ftrace_ops_list_func(ip, parent_ip, NULL, fregs);
}
NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
@@ -7011,18 +7010,18 @@ NOKPROBE_SYMBOL(ftrace_ops_no_ops);
* this function will be called by the mcount trampoline.
*/
static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
preempt_disable_notrace();
if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
- op->func(ip, parent_ip, op, regs);
+ op->func(ip, parent_ip, op, fregs);
preempt_enable_notrace();
trace_clear_recursion(bit);
@@ -7043,11 +7042,11 @@ NOKPROBE_SYMBOL(ftrace_ops_assist_func);
ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
{
/*
- * If the function does not handle recursion, needs to be RCU safe,
- * or does per cpu logic, then we need to call the assist handler.
+ * If the function does not handle recursion or needs to be RCU safe,
+ * then we need to call the assist handler.
*/
- if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) ||
- ops->flags & FTRACE_OPS_FL_RCU)
+ if (ops->flags & (FTRACE_OPS_FL_RECURSION |
+ FTRACE_OPS_FL_RCU))
return ftrace_ops_assist_func;
return ops->func;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a6268e09160a..ec08f948dd80 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
+#include <linux/trace_recursion.h>
#include <linux/trace_events.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
@@ -129,7 +130,16 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
-#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT)
+
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
+#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -1422,7 +1432,8 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
return 0;
}
-static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
+static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ long nr_pages, struct list_head *pages)
{
struct buffer_page *bpage, *tmp;
bool user_thread = current->mm != NULL;
@@ -1462,13 +1473,15 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
struct page *page;
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- mflags, cpu_to_node(cpu));
+ mflags, cpu_to_node(cpu_buffer->cpu));
if (!bpage)
goto free_pages;
+ rb_check_bpage(cpu_buffer, bpage);
+
list_add(&bpage->list, pages);
- page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
+ page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, 0);
if (!page)
goto free_pages;
bpage->page = page_address(page);
@@ -1500,7 +1513,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
WARN_ON(!nr_pages);
- if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
+ if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages))
return -ENOMEM;
/*
@@ -1973,8 +1986,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
if (nr_pages < 2)
nr_pages = 2;
- size = nr_pages * BUF_PAGE_SIZE;
-
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
@@ -2009,8 +2020,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
* allocated without receiving ENOMEM
*/
INIT_LIST_HEAD(&cpu_buffer->new_pages);
- if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu)) {
+ if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages)) {
/* not enough memory for new pages */
err = -ENOMEM;
goto out_err;
@@ -2075,8 +2086,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
INIT_LIST_HEAD(&cpu_buffer->new_pages);
if (cpu_buffer->nr_pages_to_update > 0 &&
- __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu_id)) {
+ __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages)) {
err = -ENOMEM;
goto out_err;
}
@@ -2628,9 +2639,6 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
return skip_time_extend(event);
}
-static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event);
-
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline bool sched_clock_stable(void)
{
@@ -2719,7 +2727,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA) {
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length;
} else
@@ -2734,11 +2742,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length++;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ALIGNMENT);
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
@@ -2758,20 +2766,6 @@ static unsigned rb_calculate_event_length(unsigned length)
return length;
}
-static __always_inline bool
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- unsigned long addr = (unsigned long)event;
- unsigned long index;
-
- index = rb_event_index(event);
- addr &= PAGE_MASK;
-
- return cpu_buffer->commit_page->page == (void *)addr &&
- rb_commit_index(cpu_buffer) == index;
-}
-
static u64 rb_time_delta(struct ring_buffer_event *event)
{
switch (event->type_len) {
@@ -3006,6 +3000,13 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
irq_work_queue(&cpu_buffer->irq_work.work);
}
+#ifdef CONFIG_RING_BUFFER_RECORD_RECURSION
+# define do_ring_buffer_record_recursion() \
+ do_ftrace_record_recursion(_THIS_IP_, _RET_IP_)
+#else
+# define do_ring_buffer_record_recursion() do { } while (0)
+#endif
+
/*
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
@@ -3088,8 +3089,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
* been updated yet. In this case, use the TRANSITION bit.
*/
bit = RB_CTX_TRANSITION;
- if (val & (1 << (bit + cpu_buffer->nest)))
+ if (val & (1 << (bit + cpu_buffer->nest))) {
+ do_ring_buffer_record_recursion();
return 1;
+ }
}
val |= (1 << (bit + cpu_buffer->nest));
@@ -3183,6 +3186,153 @@ int ring_buffer_unlock_commit(struct trace_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+/* Special value to validate all deltas on a page. */
+#define CHECK_FULL_PAGE 1L
+
+#ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS
+static void dump_buffer_page(struct buffer_data_page *bpage,
+ struct rb_event_info *info,
+ unsigned long tail)
+{
+ struct ring_buffer_event *event;
+ u64 ts, delta;
+ int e;
+
+ ts = bpage->time_stamp;
+ pr_warn(" [%lld] PAGE TIME STAMP\n", ts);
+
+ for (e = 0; e < tail; e += rb_event_length(event)) {
+
+ event = (struct ring_buffer_event *)(bpage->data + e);
+
+ switch (event->type_len) {
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = ring_buffer_event_time_stamp(event);
+ ts += delta;
+ pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta);
+ break;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ delta = ring_buffer_event_time_stamp(event);
+ ts = delta;
+ pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta);
+ break;
+
+ case RINGBUF_TYPE_PADDING:
+ ts += event->time_delta;
+ pr_warn(" [%lld] delta:%d PADDING\n", ts, event->time_delta);
+ break;
+
+ case RINGBUF_TYPE_DATA:
+ ts += event->time_delta;
+ pr_warn(" [%lld] delta:%d\n", ts, event->time_delta);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static DEFINE_PER_CPU(atomic_t, checking);
+static atomic_t ts_dump;
+
+/*
+ * Check if the current event time stamp matches the deltas on
+ * the buffer page.
+ */
+static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_event_info *info,
+ unsigned long tail)
+{
+ struct ring_buffer_event *event;
+ struct buffer_data_page *bpage;
+ u64 ts, delta;
+ bool full = false;
+ int e;
+
+ bpage = info->tail_page->page;
+
+ if (tail == CHECK_FULL_PAGE) {
+ full = true;
+ tail = local_read(&bpage->commit);
+ } else if (info->add_timestamp &
+ (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) {
+ /* Ignore events with absolute time stamps */
+ return;
+ }
+
+ /*
+ * Do not check the first event (skip possible extends too).
+ * Also do not check if previous events have not been committed.
+ */
+ if (tail <= 8 || tail > local_read(&bpage->commit))
+ return;
+
+ /*
+ * If this interrupted another event,
+ */
+ if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
+ goto out;
+
+ ts = bpage->time_stamp;
+
+ for (e = 0; e < tail; e += rb_event_length(event)) {
+
+ event = (struct ring_buffer_event *)(bpage->data + e);
+
+ switch (event->type_