diff options
Diffstat (limited to 'kernel')
36 files changed, 662 insertions, 352 deletions
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 54102deb50ba..53d51ed619a3 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -6,7 +6,7 @@ config HAVE_LIVEPATCH config LIVEPATCH bool "Kernel Live Patching" - depends on DYNAMIC_FTRACE_WITH_REGS + depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS depends on MODULES depends on SYSFS depends on KALLSYMS_ALL diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index b552cf2d85f8..e8029aea67f1 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -40,14 +40,18 @@ struct klp_ops *klp_find_ops(void *old_func) static void notrace klp_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *fops, - struct pt_regs *regs) + struct ftrace_regs *fregs) { struct klp_ops *ops; struct klp_func *func; int patch_state; + int bit; ops = container_of(fops, struct klp_ops, fops); + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (WARN_ON_ONCE(bit < 0)) + return; /* * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). @@ -113,10 +117,11 @@ static void notrace klp_ftrace_handler(unsigned long ip, if (func->nop) goto unlock; - klp_arch_set_pc(regs, (unsigned long)func->new_func); + klp_arch_set_pc(fregs, (unsigned long)func->new_func); unlock: preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } /* @@ -194,8 +199,10 @@ static int klp_patch_func(struct klp_func *func) return -ENOMEM; ops->fops.func = klp_ftrace_handler; - ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS | - FTRACE_OPS_FL_DYNAMIC | + ops->fops.flags = FTRACE_OPS_FL_DYNAMIC | +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + FTRACE_OPS_FL_SAVE_REGS | +#endif FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_PERMANENT; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2f8d9cc42c40..d5a19413d4f8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -31,6 +31,15 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS bool +config HAVE_DYNAMIC_FTRACE_WITH_ARGS + bool + help + If this is set, then arguments and stack can be found from + the pt_regs passed into the function callback regs parameter + by default, even without setting the REGS flag in the ftrace_ops. + This allows for use of regs_get_kernel_argument() and + kernel_stack_pointer(). + config HAVE_FTRACE_MCOUNT_RECORD bool help @@ -725,6 +734,45 @@ config TRACE_EVAL_MAP_FILE If unsure, say N. +config FTRACE_RECORD_RECURSION + bool "Record functions that recurse in function tracing" + depends on FUNCTION_TRACER + help + All callbacks that attach to the function tracing have some sort + of protection against recursion. Even though the protection exists, + it adds overhead. This option will create a file in the tracefs + file system called "recursed_functions" that will list the functions + that triggered a recursion. + + This will add more overhead to cases that have recursion. + + If unsure, say N + +config FTRACE_RECORD_RECURSION_SIZE + int "Max number of recursed functions to record" + default 128 + depends on FTRACE_RECORD_RECURSION + help + This defines the limit of number of functions that can be + listed in the "recursed_functions" file, that lists all + the functions that caused a recursion to happen. + This file can be reset, but the limit can not change in + size at runtime. + +config RING_BUFFER_RECORD_RECURSION + bool "Record functions that recurse in the ring buffer" + depends on FTRACE_RECORD_RECURSION + # default y, because it is coupled with FTRACE_RECORD_RECURSION + default y + help + The ring buffer has its own internal recursion. Although when + recursion happens it wont cause harm because of the protection, + but it does cause an unwanted overhead. Enabling this option will + place where recursion was detected into the ftrace "recursed_functions" + file. + + This will add more overhead to cases that have recursion. + config GCOV_PROFILE_FTRACE bool "Enable GCOV profiling on ftrace subsystem" depends on GCOV_KERNEL @@ -795,6 +843,26 @@ config RING_BUFFER_STARTUP_TEST If unsure, say N +config RING_BUFFER_VALIDATE_TIME_DELTAS + bool "Verify ring buffer time stamp deltas" + depends on RING_BUFFER + help + This will audit the time stamps on the ring buffer sub + buffer to make sure that all the time deltas for the + events on a sub buffer matches the current time stamp. + This audit is performed for every event that is not + interrupted, or interrupting another event. A check + is also made when traversing sub buffers to make sure + that all the deltas on the previous sub buffer do not + add up to be greater than the current time stamp. + + NOTE: This adds significant overhead to recording of events, + and should only be used to test the logic of the ring buffer. + Do not use it on production systems. + + Only say Y if you understand what this does, and you + still want it enabled. Otherwise say N + config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index e153be351548..7e44cea89fdc 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o +obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 456fe4ce6942..fb0fe4c66b84 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1279,7 +1279,7 @@ static void blk_log_action(struct trace_iterator *iter, const char *act, * ones now use the 64bit ino as the whole ID and * no longer use generation. * - * Regarldess of the content, always output + * Regardless of the content, always output * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can * be mapped back to @id on both 64 and 32bit ino * setups. See __kernfs_fh_to_dentry(). @@ -1321,7 +1321,7 @@ static void blk_log_dump_pdu(struct trace_seq *s, i == 0 ? "" : " ", pdu_buf[i]); /* - * stop when the rest is just zeroes and indicate so + * stop when the rest is just zeros and indicate so * with a ".." appended */ if (i == end && end != pdu_len - 1) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ebadaa83502c..6c0018abe68a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -116,7 +116,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock * to all call sites, we did a bpf_prog_array_valid() there to check * whether call->prog_array is empty or not, which is - * a heurisitc to speed up execution. + * a heuristic to speed up execution. * * If bpf_prog_array_valid() fetched prog_array was * non-NULL, we go into trace_call_bpf() and do the actual diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 5658f13037b3..73edb9e4f354 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -334,8 +334,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, static struct ftrace_ops graph_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9c1bba8cc51b..4d8e35575549 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -80,7 +80,7 @@ enum { struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, + .flags = FTRACE_OPS_FL_STUB, INIT_OPS_HASH(ftrace_list_end) }; @@ -121,7 +121,7 @@ struct ftrace_ops global_ops; #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); #else /* See comment below, where ftrace_ops_list_func is defined */ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); @@ -140,7 +140,7 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; int pid; @@ -154,7 +154,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, return; } - op->saved_func(ip, parent_ip, op, regs); + op->saved_func(ip, parent_ip, op, fregs); } static void ftrace_sync_ipi(void *data) @@ -754,7 +754,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) static void function_profile_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -866,7 +866,7 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_INITIALIZED, INIT_OPS_HASH(ftrace_profile_ops) }; @@ -1040,8 +1040,7 @@ struct ftrace_ops global_ops = { .local_hash.notrace_hash = EMPTY_HASH, .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID, }; @@ -2146,6 +2145,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) else rec->flags &= ~FTRACE_FL_TRAMP_EN; } + if (flag & FTRACE_FL_DIRECT) { /* * If there's only one user (direct_ops helper) @@ -2389,8 +2389,9 @@ unsigned long ftrace_find_rec_direct(unsigned long ip) } static void call_direct_funcs(unsigned long ip, unsigned long pip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); unsigned long addr; addr = ftrace_find_rec_direct(ip); @@ -2402,7 +2403,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops direct_ops = { .func = call_direct_funcs, - .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE + .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT, /* @@ -4183,7 +4184,6 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, struct ftrace_hash **orig_hash, *new_hash; LIST_HEAD(process_mods); char *func; - int ret; mutex_lock(&ops->func_hash->regex_lock); @@ -4236,7 +4236,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, mutex_lock(&ftrace_lock); - ret = ftrace_hash_move_and_update_ops(ops, orig_hash, + ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); mutex_unlock(&ftrace_lock); @@ -4314,7 +4314,7 @@ static int __init ftrace_mod_cmd_init(void) core_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct ftrace_probe_ops *probe_ops; struct ftrace_func_probe *probe; @@ -5588,7 +5588,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) struct ftrace_hash **orig_hash; struct trace_parser *parser; int filter_hash; - int ret; if (file->f_mode & FMODE_READ) { iter = m->private; @@ -5616,7 +5615,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file) orig_hash = &iter->ops->func_hash->notrace_hash; mutex_lock(&ftrace_lock); - ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash, + ftrace_hash_move_and_update_ops(iter->ops, orig_hash, iter->hash, filter_hash); mutex_unlock(&ftrace_lock); } else { @@ -6884,8 +6883,7 @@ void ftrace_init_trace_array(struct trace_array *tr) struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | + .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID, }; @@ -6935,12 +6933,13 @@ void ftrace_reset_array_ops(struct trace_array *tr) static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ignored, struct pt_regs *regs) + struct ftrace_ops *ignored, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; @@ -6969,7 +6968,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, pr_warn("op=%p %pS\n", op, op); goto out; } - op->func(ip, parent_ip, op, regs); + op->func(ip, parent_ip, op, fregs); } } while_for_each_ftrace_op(op); out: @@ -6992,9 +6991,9 @@ out: */ #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { - __ftrace_ops_list_func(ip, parent_ip, NULL, regs); + __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } NOKPROBE_SYMBOL(ftrace_ops_list_func); #else @@ -7011,18 +7010,18 @@ NOKPROBE_SYMBOL(ftrace_ops_no_ops); * this function will be called by the mcount trampoline. */ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; preempt_disable_notrace(); if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) - op->func(ip, parent_ip, op, regs); + op->func(ip, parent_ip, op, fregs); preempt_enable_notrace(); trace_clear_recursion(bit); @@ -7043,11 +7042,11 @@ NOKPROBE_SYMBOL(ftrace_ops_assist_func); ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { /* - * If the function does not handle recursion, needs to be RCU safe, - * or does per cpu logic, then we need to call the assist handler. + * If the function does not handle recursion or needs to be RCU safe, + * then we need to call the assist handler. */ - if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || - ops->flags & FTRACE_OPS_FL_RCU) + if (ops->flags & (FTRACE_OPS_FL_RECURSION | + FTRACE_OPS_FL_RCU)) return ftrace_ops_assist_func; return ops->func; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a6268e09160a..ec08f948dd80 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4,6 +4,7 @@ * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ +#include <linux/trace_recursion.h> #include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> @@ -129,7 +130,16 @@ int ring_buffer_print_entry_header(struct trace_seq *s) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ -#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT) + +#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS +# define RB_FORCE_8BYTE_ALIGNMENT 0 +# define RB_ARCH_ALIGNMENT RB_ALIGNMENT +#else +# define RB_FORCE_8BYTE_ALIGNMENT 1 +# define RB_ARCH_ALIGNMENT 8U +#endif + +#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX @@ -1422,7 +1432,8 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) return 0; } -static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) +static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, + long nr_pages, struct list_head *pages) { struct buffer_page *bpage, *tmp; bool user_thread = current->mm != NULL; @@ -1462,13 +1473,15 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) struct page *page; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), - mflags, cpu_to_node(cpu)); + mflags, cpu_to_node(cpu_buffer->cpu)); if (!bpage) goto free_pages; + rb_check_bpage(cpu_buffer, bpage); + list_add(&bpage->list, pages); - page = alloc_pages_node(cpu_to_node(cpu), mflags, 0); + page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, 0); if (!page) goto free_pages; bpage->page = page_address(page); @@ -1500,7 +1513,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, WARN_ON(!nr_pages); - if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu)) + if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages)) return -ENOMEM; /* @@ -1973,8 +1986,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, if (nr_pages < 2) nr_pages = 2; - size = nr_pages * BUF_PAGE_SIZE; - /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); @@ -2009,8 +2020,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, * allocated without receiving ENOMEM */ INIT_LIST_HEAD(&cpu_buffer->new_pages); - if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update, - &cpu_buffer->new_pages, cpu)) { + if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update, + &cpu_buffer->new_pages)) { /* not enough memory for new pages */ err = -ENOMEM; goto out_err; @@ -2075,8 +2086,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, INIT_LIST_HEAD(&cpu_buffer->new_pages); if (cpu_buffer->nr_pages_to_update > 0 && - __rb_allocate_pages(cpu_buffer->nr_pages_to_update, - &cpu_buffer->new_pages, cpu_id)) { + __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update, + &cpu_buffer->new_pages)) { err = -ENOMEM; goto out_err; } @@ -2628,9 +2639,6 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) return skip_time_extend(event); } -static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event); - #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static inline bool sched_clock_stable(void) { @@ -2719,7 +2727,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, event->time_delta = delta; length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA) { + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { event->type_len = 0; event->array[0] = length; } else @@ -2734,11 +2742,11 @@ static unsigned rb_calculate_event_length(unsigned length) if (!length) length++; - if (length > RB_MAX_SMALL_DATA) + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) length += sizeof(event.array[0]); length += RB_EVNT_HDR_SIZE; - length = ALIGN(length, RB_ALIGNMENT); + length = ALIGN(length, RB_ARCH_ALIGNMENT); /* * In case the time delta is larger than the 27 bits for it @@ -2758,20 +2766,6 @@ static unsigned rb_calculate_event_length(unsigned length) return length; } -static __always_inline bool -rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event) -{ - unsigned long addr = (unsigned long)event; - unsigned long index; - - index = rb_event_index(event); - addr &= PAGE_MASK; - - return cpu_buffer->commit_page->page == (void *)addr && - rb_commit_index(cpu_buffer) == index; -} - static u64 rb_time_delta(struct ring_buffer_event *event) { switch (event->type_len) { @@ -3006,6 +3000,13 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) irq_work_queue(&cpu_buffer->irq_work.work); } +#ifdef CONFIG_RING_BUFFER_RECORD_RECURSION +# define do_ring_buffer_record_recursion() \ + do_ftrace_record_recursion(_THIS_IP_, _RET_IP_) +#else +# define do_ring_buffer_record_recursion() do { } while (0) +#endif + /* * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified @@ -3088,8 +3089,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) * been updated yet. In this case, use the TRANSITION bit. */ bit = RB_CTX_TRANSITION; - if (val & (1 << (bit + cpu_buffer->nest))) + if (val & (1 << (bit + cpu_buffer->nest))) { + do_ring_buffer_record_recursion(); return 1; + } } val |= (1 << (bit + cpu_buffer->nest)); @@ -3183,6 +3186,153 @@ int ring_buffer_unlock_commit(struct trace_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); +/* Special value to validate all deltas on a page. */ +#define CHECK_FULL_PAGE 1L + +#ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS +static void dump_buffer_page(struct buffer_data_page *bpage, + struct rb_event_info *info, + unsigned long tail) +{ + struct ring_buffer_event *event; + u64 ts, delta; + int e; + + ts = bpage->time_stamp; + pr_warn(" [%lld] PAGE TIME STAMP\n", ts); + + for (e = 0; e < tail; e += rb_event_length(event)) { + + event = (struct ring_buffer_event *)(bpage->data + e); + + switch (event->type_len) { + + case RINGBUF_TYPE_TIME_EXTEND: + delta = ring_buffer_event_time_stamp(event); + ts += delta; + pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta); + break; + + case RINGBUF_TYPE_TIME_STAMP: + delta = ring_buffer_event_time_stamp(event); + ts = delta; + pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta); + break; + + case RINGBUF_TYPE_PADDING: + ts += event->time_delta; + pr_warn(" [%lld] delta:%d PADDING\n", ts, event->time_delta); + break; + + case RINGBUF_TYPE_DATA: + ts += event->time_delta; + pr_warn(" [%lld] delta:%d\n", ts, event->time_delta); + break; + + default: + break; + } + } +} + +static DEFINE_PER_CPU(atomic_t, checking); +static atomic_t ts_dump; + +/* + * Check if the current event time stamp matches the deltas on + * the buffer page. + */ +static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, + struct rb_event_info *info, + unsigned long tail) +{ + struct ring_buffer_event *event; + struct buffer_data_page *bpage; + u64 ts, delta; + bool full = false; + int e; + + bpage = info->tail_page->page; + + if (tail == CHECK_FULL_PAGE) { + full = true; + tail = local_read(&bpage->commit); + } else if (info->add_timestamp & + (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) { + /* Ignore events with absolute time stamps */ + return; + } + + /* + * Do not check the first event (skip possible extends too). + * Also do not check if previous events have not been committed. + */ + if (tail <= 8 || tail > local_read(&bpage->commit)) + return; + + /* + * If this interrupted another event, + */ + if (atomic_inc_return(this_cpu_ptr(&checking)) != 1) + goto out; + + ts = bpage->time_stamp; + + for (e = 0; e < tail; e += rb_event_length(event)) { + + event = (struct ring_buffer_event *)(bpage->data + e); + + switch (event->type_len) { + + case RINGBUF_TYPE_TIME_EXTEND: + delta = ring_buffer_event_time_stamp(event); + ts += delta; + break; + + case RINGBUF_TYPE_TIME_STAMP: + delta = ring_buffer_event_time_stamp(event); + ts = delta; + break; + + case RINGBUF_TYPE_PADDING: + if (event->time_delta == 1) + break; + /* fall through */ + case RINGBUF_TYPE_DATA: + ts += event->time_delta; + break; + + default: + RB_WARN_ON(cpu_buffer, 1); + } + } + if ((full && ts > info->ts) || + (!full && ts + info->delta != info->ts)) { + /* If another report is happening, ignore this one */ + if (atomic_inc_return(&ts_dump) != 1) { + atomic_dec(&ts_dump); + goto out; + } + atomic_inc(&cpu_buffer->record_disabled); + pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld after:%lld\n", + cpu_buffer->cpu, + ts + info->delta, info->ts, info->delta, info->after); + dump_buffer_page(bpage, info, tail); + atomic_dec(&ts_dump); + /* Do not re-enable checking */ + return; + } +out: + atomic_dec(this_cpu_ptr(&checking)); +} +#else +static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, + struct rb_e |