summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/livepatch/Kconfig2
-rw-r--r--kernel/livepatch/patch.c15
-rw-r--r--kernel/trace/Kconfig68
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/fgraph.c3
-rw-r--r--kernel/trace/ftrace.c57
-rw-r--r--kernel/trace/ring_buffer.c223
-rw-r--r--kernel/trace/synth_event_gen_test.c2
-rw-r--r--kernel/trace/trace.c53
-rw-r--r--kernel/trace/trace.h182
-rw-r--r--kernel/trace/trace_benchmark.c6
-rw-r--r--kernel/trace/trace_boot.c2
-rw-r--r--kernel/trace/trace_dynevent.c2
-rw-r--r--kernel/trace/trace_dynevent.h6
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_event_perf.c15
-rw-r--r--kernel/trace/trace_events.c9
-rw-r--r--kernel/trace/trace_events_filter.c23
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--kernel/trace/trace_events_synth.c4
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_functions.c23
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--kernel/trace/trace_hwlat.c4
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c9
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_output.h1
-rw-r--r--kernel/trace/trace_recursion_record.c236
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_selftest.c29
-rw-r--r--kernel/trace/trace_stack.c3
-rw-r--r--kernel/trace/tracing_map.c6
-rw-r--r--kernel/trace/tracing_map.h2
36 files changed, 662 insertions, 352 deletions
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index 54102deb50ba..53d51ed619a3 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig
@@ -6,7 +6,7 @@ config HAVE_LIVEPATCH
config LIVEPATCH
bool "Kernel Live Patching"
- depends on DYNAMIC_FTRACE_WITH_REGS
+ depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
depends on MODULES
depends on SYSFS
depends on KALLSYMS_ALL
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index b552cf2d85f8..e8029aea67f1 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -40,14 +40,18 @@ struct klp_ops *klp_find_ops(void *old_func)
static void notrace klp_ftrace_handler(unsigned long ip,
unsigned long parent_ip,
struct ftrace_ops *fops,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs)
{
struct klp_ops *ops;
struct klp_func *func;
int patch_state;
+ int bit;
ops = container_of(fops, struct klp_ops, fops);
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (WARN_ON_ONCE(bit < 0))
+ return;
/*
* A variant of synchronize_rcu() is used to allow patching functions
* where RCU is not watching, see klp_synchronize_transition().
@@ -113,10 +117,11 @@ static void notrace klp_ftrace_handler(unsigned long ip,
if (func->nop)
goto unlock;
- klp_arch_set_pc(regs, (unsigned long)func->new_func);
+ klp_arch_set_pc(fregs, (unsigned long)func->new_func);
unlock:
preempt_enable_notrace();
+ ftrace_test_recursion_unlock(bit);
}
/*
@@ -194,8 +199,10 @@ static int klp_patch_func(struct klp_func *func)
return -ENOMEM;
ops->fops.func = klp_ftrace_handler;
- ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS |
- FTRACE_OPS_FL_DYNAMIC |
+ ops->fops.flags = FTRACE_OPS_FL_DYNAMIC |
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ FTRACE_OPS_FL_SAVE_REGS |
+#endif
FTRACE_OPS_FL_IPMODIFY |
FTRACE_OPS_FL_PERMANENT;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2f8d9cc42c40..d5a19413d4f8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -31,6 +31,15 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
bool
+config HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ bool
+ help
+ If this is set, then arguments and stack can be found from
+ the pt_regs passed into the function callback regs parameter
+ by default, even without setting the REGS flag in the ftrace_ops.
+ This allows for use of regs_get_kernel_argument() and
+ kernel_stack_pointer().
+
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
@@ -725,6 +734,45 @@ config TRACE_EVAL_MAP_FILE
If unsure, say N.
+config FTRACE_RECORD_RECURSION
+ bool "Record functions that recurse in function tracing"
+ depends on FUNCTION_TRACER
+ help
+ All callbacks that attach to the function tracing have some sort
+ of protection against recursion. Even though the protection exists,
+ it adds overhead. This option will create a file in the tracefs
+ file system called "recursed_functions" that will list the functions
+ that triggered a recursion.
+
+ This will add more overhead to cases that have recursion.
+
+ If unsure, say N
+
+config FTRACE_RECORD_RECURSION_SIZE
+ int "Max number of recursed functions to record"
+ default 128
+ depends on FTRACE_RECORD_RECURSION
+ help
+ This defines the limit of number of functions that can be
+ listed in the "recursed_functions" file, that lists all
+ the functions that caused a recursion to happen.
+ This file can be reset, but the limit can not change in
+ size at runtime.
+
+config RING_BUFFER_RECORD_RECURSION
+ bool "Record functions that recurse in the ring buffer"
+ depends on FTRACE_RECORD_RECURSION
+ # default y, because it is coupled with FTRACE_RECORD_RECURSION
+ default y
+ help
+ The ring buffer has its own internal recursion. Although when
+ recursion happens it wont cause harm because of the protection,
+ but it does cause an unwanted overhead. Enabling this option will
+ place where recursion was detected into the ftrace "recursed_functions"
+ file.
+
+ This will add more overhead to cases that have recursion.
+
config GCOV_PROFILE_FTRACE
bool "Enable GCOV profiling on ftrace subsystem"
depends on GCOV_KERNEL
@@ -795,6 +843,26 @@ config RING_BUFFER_STARTUP_TEST
If unsure, say N
+config RING_BUFFER_VALIDATE_TIME_DELTAS
+ bool "Verify ring buffer time stamp deltas"
+ depends on RING_BUFFER
+ help
+ This will audit the time stamps on the ring buffer sub
+ buffer to make sure that all the time deltas for the
+ events on a sub buffer matches the current time stamp.
+ This audit is performed for every event that is not
+ interrupted, or interrupting another event. A check
+ is also made when traversing sub buffers to make sure
+ that all the deltas on the previous sub buffer do not
+ add up to be greater than the current time stamp.
+
+ NOTE: This adds significant overhead to recording of events,
+ and should only be used to test the logic of the ring buffer.
+ Do not use it on production systems.
+
+ Only say Y if you understand what this does, and you
+ still want it enabled. Otherwise say N
+
config MMIOTRACE_TEST
tristate "Test module for mmiotrace"
depends on MMIOTRACE && m
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e153be351548..7e44cea89fdc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
+obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 456fe4ce6942..fb0fe4c66b84 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1279,7 +1279,7 @@ static void blk_log_action(struct trace_iterator *iter, const char *act,
* ones now use the 64bit ino as the whole ID and
* no longer use generation.
*
- * Regarldess of the content, always output
+ * Regardless of the content, always output
* "LOW32,HIGH32" so that FILEID_INO32_GEN fid can
* be mapped back to @id on both 64 and 32bit ino
* setups. See __kernfs_fh_to_dentry().
@@ -1321,7 +1321,7 @@ static void blk_log_dump_pdu(struct trace_seq *s,
i == 0 ? "" : " ", pdu_buf[i]);
/*
- * stop when the rest is just zeroes and indicate so
+ * stop when the rest is just zeros and indicate so
* with a ".." appended
*/
if (i == end && end != pdu_len - 1) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ebadaa83502c..6c0018abe68a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -116,7 +116,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
* to all call sites, we did a bpf_prog_array_valid() there to check
* whether call->prog_array is empty or not, which is
- * a heurisitc to speed up execution.
+ * a heuristic to speed up execution.
*
* If bpf_prog_array_valid() fetched prog_array was
* non-NULL, we go into trace_call_bpf() and do the actual
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 5658f13037b3..73edb9e4f354 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -334,8 +334,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
static struct ftrace_ops graph_ops = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID |
FTRACE_OPS_FL_STUB,
#ifdef FTRACE_GRAPH_TRAMP_ADDR
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9c1bba8cc51b..4d8e35575549 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -80,7 +80,7 @@ enum {
struct ftrace_ops ftrace_list_end __read_mostly = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
+ .flags = FTRACE_OPS_FL_STUB,
INIT_OPS_HASH(ftrace_list_end)
};
@@ -121,7 +121,7 @@ struct ftrace_ops global_ops;
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs);
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
#else
/* See comment below, where ftrace_ops_list_func is defined */
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
@@ -140,7 +140,7 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
}
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = op->private;
int pid;
@@ -154,7 +154,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
return;
}
- op->saved_func(ip, parent_ip, op, regs);
+ op->saved_func(ip, parent_ip, op, fregs);
}
static void ftrace_sync_ipi(void *data)
@@ -754,7 +754,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
static void
function_profile_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct ftrace_profile_stat *stat;
struct ftrace_profile *rec;
@@ -866,7 +866,7 @@ static void unregister_ftrace_profiler(void)
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
.func = function_profile_call,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
+ .flags = FTRACE_OPS_FL_INITIALIZED,
INIT_OPS_HASH(ftrace_profile_ops)
};
@@ -1040,8 +1040,7 @@ struct ftrace_ops global_ops = {
.local_hash.notrace_hash = EMPTY_HASH,
.local_hash.filter_hash = EMPTY_HASH,
INIT_OPS_HASH(global_ops)
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID,
};
@@ -2146,6 +2145,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
else
rec->flags &= ~FTRACE_FL_TRAMP_EN;
}
+
if (flag & FTRACE_FL_DIRECT) {
/*
* If there's only one user (direct_ops helper)
@@ -2389,8 +2389,9 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
}
static void call_direct_funcs(unsigned long ip, unsigned long pip,
- struct ftrace_ops *ops, struct pt_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
+ struct pt_regs *regs = ftrace_get_regs(fregs);
unsigned long addr;
addr = ftrace_find_rec_direct(ip);
@@ -2402,7 +2403,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
struct ftrace_ops direct_ops = {
.func = call_direct_funcs,
- .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE
+ .flags = FTRACE_OPS_FL_IPMODIFY
| FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
| FTRACE_OPS_FL_PERMANENT,
/*
@@ -4183,7 +4184,6 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
struct ftrace_hash **orig_hash, *new_hash;
LIST_HEAD(process_mods);
char *func;
- int ret;
mutex_lock(&ops->func_hash->regex_lock);
@@ -4236,7 +4236,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move_and_update_ops(ops, orig_hash,
+ ftrace_hash_move_and_update_ops(ops, orig_hash,
new_hash, enable);
mutex_unlock(&ftrace_lock);
@@ -4314,7 +4314,7 @@ static int __init ftrace_mod_cmd_init(void)
core_initcall(ftrace_mod_cmd_init);
static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct ftrace_probe_ops *probe_ops;
struct ftrace_func_probe *probe;
@@ -5588,7 +5588,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
struct ftrace_hash **orig_hash;
struct trace_parser *parser;
int filter_hash;
- int ret;
if (file->f_mode & FMODE_READ) {
iter = m->private;
@@ -5616,7 +5615,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash = &iter->ops->func_hash->notrace_hash;
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
+ ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
iter->hash, filter_hash);
mutex_unlock(&ftrace_lock);
} else {
@@ -6884,8 +6883,7 @@ void ftrace_init_trace_array(struct trace_array *tr)
struct ftrace_ops global_ops = {
.func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
+ .flags = FTRACE_OPS_FL_INITIALIZED |
FTRACE_OPS_FL_PID,
};
@@ -6935,12 +6933,13 @@ void ftrace_reset_array_ops(struct trace_array *tr)
static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ignored, struct pt_regs *regs)
+ struct ftrace_ops *ignored, struct ftrace_regs *fregs)
{
+ struct pt_regs *regs = ftrace_get_regs(fregs);
struct ftrace_ops *op;
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
@@ -6969,7 +6968,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
pr_warn("op=%p %pS\n", op, op);
goto out;
}
- op->func(ip, parent_ip, op, regs);
+ op->func(ip, parent_ip, op, fregs);
}
} while_for_each_ftrace_op(op);
out:
@@ -6992,9 +6991,9 @@ out:
*/
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- __ftrace_ops_list_func(ip, parent_ip, NULL, regs);
+ __ftrace_ops_list_func(ip, parent_ip, NULL, fregs);
}
NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
@@ -7011,18 +7010,18 @@ NOKPROBE_SYMBOL(ftrace_ops_no_ops);
* this function will be called by the mcount trampoline.
*/
static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *regs)
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
preempt_disable_notrace();
if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
- op->func(ip, parent_ip, op, regs);
+ op->func(ip, parent_ip, op, fregs);
preempt_enable_notrace();
trace_clear_recursion(bit);
@@ -7043,11 +7042,11 @@ NOKPROBE_SYMBOL(ftrace_ops_assist_func);
ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
{
/*
- * If the function does not handle recursion, needs to be RCU safe,
- * or does per cpu logic, then we need to call the assist handler.
+ * If the function does not handle recursion or needs to be RCU safe,
+ * then we need to call the assist handler.
*/
- if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) ||
- ops->flags & FTRACE_OPS_FL_RCU)
+ if (ops->flags & (FTRACE_OPS_FL_RECURSION |
+ FTRACE_OPS_FL_RCU))
return ftrace_ops_assist_func;
return ops->func;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a6268e09160a..ec08f948dd80 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
+#include <linux/trace_recursion.h>
#include <linux/trace_events.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
@@ -129,7 +130,16 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
-#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT)
+
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
+#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -1422,7 +1432,8 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
return 0;
}
-static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
+static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ long nr_pages, struct list_head *pages)
{
struct buffer_page *bpage, *tmp;
bool user_thread = current->mm != NULL;
@@ -1462,13 +1473,15 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
struct page *page;
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
- mflags, cpu_to_node(cpu));
+ mflags, cpu_to_node(cpu_buffer->cpu));
if (!bpage)
goto free_pages;
+ rb_check_bpage(cpu_buffer, bpage);
+
list_add(&bpage->list, pages);
- page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
+ page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, 0);
if (!page)
goto free_pages;
bpage->page = page_address(page);
@@ -1500,7 +1513,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
WARN_ON(!nr_pages);
- if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
+ if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages))
return -ENOMEM;
/*
@@ -1973,8 +1986,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
if (nr_pages < 2)
nr_pages = 2;
- size = nr_pages * BUF_PAGE_SIZE;
-
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
@@ -2009,8 +2020,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
* allocated without receiving ENOMEM
*/
INIT_LIST_HEAD(&cpu_buffer->new_pages);
- if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu)) {
+ if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages)) {
/* not enough memory for new pages */
err = -ENOMEM;
goto out_err;
@@ -2075,8 +2086,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
INIT_LIST_HEAD(&cpu_buffer->new_pages);
if (cpu_buffer->nr_pages_to_update > 0 &&
- __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu_id)) {
+ __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages)) {
err = -ENOMEM;
goto out_err;
}
@@ -2628,9 +2639,6 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
return skip_time_extend(event);
}
-static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event);
-
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline bool sched_clock_stable(void)
{
@@ -2719,7 +2727,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA) {
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length;
} else
@@ -2734,11 +2742,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length++;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ALIGNMENT);
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
@@ -2758,20 +2766,6 @@ static unsigned rb_calculate_event_length(unsigned length)
return length;
}
-static __always_inline bool
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- unsigned long addr = (unsigned long)event;
- unsigned long index;
-
- index = rb_event_index(event);
- addr &= PAGE_MASK;
-
- return cpu_buffer->commit_page->page == (void *)addr &&
- rb_commit_index(cpu_buffer) == index;
-}
-
static u64 rb_time_delta(struct ring_buffer_event *event)
{
switch (event->type_len) {
@@ -3006,6 +3000,13 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
irq_work_queue(&cpu_buffer->irq_work.work);
}
+#ifdef CONFIG_RING_BUFFER_RECORD_RECURSION
+# define do_ring_buffer_record_recursion() \
+ do_ftrace_record_recursion(_THIS_IP_, _RET_IP_)
+#else
+# define do_ring_buffer_record_recursion() do { } while (0)
+#endif
+
/*
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
@@ -3088,8 +3089,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
* been updated yet. In this case, use the TRANSITION bit.
*/
bit = RB_CTX_TRANSITION;
- if (val & (1 << (bit + cpu_buffer->nest)))
+ if (val & (1 << (bit + cpu_buffer->nest))) {
+ do_ring_buffer_record_recursion();
return 1;
+ }
}
val |= (1 << (bit + cpu_buffer->nest));
@@ -3183,6 +3186,153 @@ int ring_buffer_unlock_commit(struct trace_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+/* Special value to validate all deltas on a page. */
+#define CHECK_FULL_PAGE 1L
+
+#ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS
+static void dump_buffer_page(struct buffer_data_page *bpage,
+ struct rb_event_info *info,
+ unsigned long tail)
+{
+ struct ring_buffer_event *event;
+ u64 ts, delta;
+ int e;
+
+ ts = bpage->time_stamp;
+ pr_warn(" [%lld] PAGE TIME STAMP\n", ts);
+
+ for (e = 0; e < tail; e += rb_event_length(event)) {
+
+ event = (struct ring_buffer_event *)(bpage->data + e);
+
+ switch (event->type_len) {
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = ring_buffer_event_time_stamp(event);
+ ts += delta;
+ pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta);
+ break;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ delta = ring_buffer_event_time_stamp(event);
+ ts = delta;
+ pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta);
+ break;
+
+ case RINGBUF_TYPE_PADDING:
+ ts += event->time_delta;
+ pr_warn(" [%lld] delta:%d PADDING\n", ts, event->time_delta);
+ break;
+
+ case RINGBUF_TYPE_DATA:
+ ts += event->time_delta;
+ pr_warn(" [%lld] delta:%d\n", ts, event->time_delta);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static DEFINE_PER_CPU(atomic_t, checking);
+static atomic_t ts_dump;
+
+/*
+ * Check if the current event time stamp matches the deltas on
+ * the buffer page.
+ */
+static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_event_info *info,
+ unsigned long tail)
+{
+ struct ring_buffer_event *event;
+ struct buffer_data_page *bpage;
+ u64 ts, delta;
+ bool full = false;
+ int e;
+
+ bpage = info->tail_page->page;
+
+ if (tail == CHECK_FULL_PAGE) {
+ full = true;
+ tail = local_read(&bpage->commit);
+ } else if (info->add_timestamp &
+ (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) {
+ /* Ignore events with absolute time stamps */
+ return;
+ }
+
+ /*
+ * Do not check the first event (skip possible extends too).
+ * Also do not check if previous events have not been committed.
+ */
+ if (tail <= 8 || tail > local_read(&bpage->commit))
+ return;
+
+ /*
+ * If this interrupted another event,
+ */
+ if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
+ goto out;
+
+ ts = bpage->time_stamp;
+
+ for (e = 0; e < tail; e += rb_event_length(event)) {
+
+ event = (struct ring_buffer_event *)(bpage->data + e);
+
+ switch (event->type_len) {
+
+ case RINGBUF_TYPE_TIME_EXTEND:
+ delta = ring_buffer_event_time_stamp(event);
+ ts += delta;
+ break;
+
+ case RINGBUF_TYPE_TIME_STAMP:
+ delta = ring_buffer_event_time_stamp(event);
+ ts = delta;
+ break;
+
+ case RINGBUF_TYPE_PADDING:
+ if (event->time_delta == 1)
+ break;
+ /* fall through */
+ case RINGBUF_TYPE_DATA:
+ ts += event->time_delta;
+ break;
+
+ default:
+ RB_WARN_ON(cpu_buffer, 1);
+ }
+ }
+ if ((full && ts > info->ts) ||
+ (!full && ts + info->delta != info->ts)) {
+ /* If another report is happening, ignore this one */
+ if (atomic_inc_return(&ts_dump) != 1) {
+ atomic_dec(&ts_dump);
+ goto out;
+ }
+ atomic_inc(&cpu_buffer->record_disabled);
+ pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld after:%lld\n",
+ cpu_buffer->cpu,
+ ts + info->delta, info->ts, info->delta, info->after);
+ dump_buffer_page(bpage, info, tail);
+ atomic_dec(&ts_dump);
+ /* Do not re-enable checking */
+ return;
+ }
+out:
+ atomic_dec(this_cpu_ptr(&checking));
+}
+#else
+static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_e