diff options
28 files changed, 1194 insertions, 256 deletions
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index ff658e27d25b..3b5614b1d1a5 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -125,10 +125,13 @@ of ftrace. Here is a list of some of the key files: trace: This file holds the output of the trace in a human - readable format (described below). Note, tracing is temporarily - disabled when the file is open for reading. Once all readers - are closed, tracing is re-enabled. Opening this file for + readable format (described below). Opening this file for writing with the O_TRUNC flag clears the ring buffer content. + Note, this file is not a consumer. If tracing is off + (no tracer running, or tracing_on is zero), it will produce + the same output each time it is read. When tracing is on, + it may produce inconsistent results as it tries to read + the entire buffer without consuming it. trace_pipe: @@ -142,9 +145,7 @@ of ftrace. Here is a list of some of the key files: will not be read again with a sequential read. The "trace" file is static, and if the tracer is not adding more data, it will display the same - information every time it is read. Unlike the - "trace" file, opening this file for reading will not - temporarily disable tracing. + information every time it is read. trace_options: @@ -262,6 +263,20 @@ of ftrace. Here is a list of some of the key files: traced by the function tracer as well. This option will also cause PIDs of tasks that exit to be removed from the file. + set_ftrace_notrace_pid: + + Have the function tracer ignore threads whose PID are listed in + this file. + + If the "function-fork" option is set, then when a task whose + PID is listed in this file forks, the child's PID will + automatically be added to this file, and the child will not be + traced by the function tracer as well. This option will also + cause PIDs of tasks that exit to be removed from the file. + + If a PID is in both this file and "set_ftrace_pid", then this + file takes precedence, and the thread will not be traced. + set_event_pid: Have the events only trace a task with a PID listed in this file. @@ -273,6 +288,19 @@ of ftrace. Here is a list of some of the key files: cause the PIDs of tasks to be removed from this file when the task exits. + set_event_notrace_pid: + + Have the events not trace a task with a PID listed in this file. + Note, sched_switch and sched_wakeup will trace threads not listed + in this file, even if a thread's PID is in the file if the + sched_switch or sched_wakeup events also trace a thread that should + be traced. + + To have the PIDs of children of tasks with their PID in this file + added on fork, enable the "event-fork" option. That option will also + cause the PIDs of tasks to be removed from this file when the task + exits. + set_graph_function: Functions listed in this file will cause the function graph @@ -1125,6 +1153,12 @@ Here are the available options: the trace displays additional information about the latency, as described in "Latency trace format". + pause-on-trace + When set, opening the trace file for read, will pause + writing to the ring buffer (as if tracing_on was set to zero). + This simulates the original behavior of the trace file. + When the file is closed, tracing will be enabled again. + record-cmd When any event or tracer is enabled, a hook is enabled in the sched_switch trace point to fill comm cache @@ -1176,6 +1210,8 @@ Here are the available options: tasks fork. Also, when tasks with PIDs in set_event_pid exit, their PIDs will be removed from the file. + This affects PIDs listed in set_event_notrace_pid as well. + function-trace The latency tracers will enable function tracing if this option is enabled (default it is). When @@ -1190,6 +1226,8 @@ Here are the available options: set_ftrace_pid exit, their PIDs will be removed from the file. + This affects PIDs in set_ftrace_notrace_pid as well. + display-graph When set, the latency tracers (irqsoff, wakeup, etc) will use function graph tracing instead of function tracing. @@ -2126,6 +2164,8 @@ periodically make a CPU constantly busy with interrupts disabled. # cat trace # tracer: hwlat # + # entries-in-buffer/entries-written: 13/13 #P:8 + # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq @@ -2133,12 +2173,18 @@ periodically make a CPU constantly busy with interrupts disabled. # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | - <...>-3638 [001] d... 19452.055471: #1 inner/outer(us): 12/14 ts:1499801089.066141940 - <...>-3638 [003] d... 19454.071354: #2 inner/outer(us): 11/9 ts:1499801091.082164365 - <...>-3638 [002] dn.. 19461.126852: #3 inner/outer(us): 12/9 ts:1499801098.138150062 - <...>-3638 [001] d... 19488.340960: #4 inner/outer(us): 8/12 ts:1499801125.354139633 - <...>-3638 [003] d... 19494.388553: #5 inner/outer(us): 8/12 ts:1499801131.402150961 - <...>-3638 [003] d... 19501.283419: #6 inner/outer(us): 0/12 ts:1499801138.297435289 nmi-total:4 nmi-count:1 + <...>-1729 [001] d... 678.473449: #1 inner/outer(us): 11/12 ts:1581527483.343962693 count:6 + <...>-1729 [004] d... 689.556542: #2 inner/outer(us): 16/9 ts:1581527494.889008092 count:1 + <...>-1729 [005] d... 714.756290: #3 inner/outer(us): 16/16 ts:1581527519.678961629 count:5 + <...>-1729 [001] d... 718.788247: #4 inner/outer(us): 9/17 ts:1581527523.889012713 count:1 + <...>-1729 [002] d... 719.796341: #5 inner/outer(us): 13/9 ts:1581527524.912872606 count:1 + <...>-1729 [006] d... 844.787091: #6 inner/outer(us): 9/12 ts:1581527649.889048502 count:2 + <...>-1729 [003] d... 849.827033: #7 inner/outer(us): 18/9 ts:1581527654.889013793 count:1 + <...>-1729 [007] d... 853.859002: #8 inner/outer(us): 9/12 ts:1581527658.889065736 count:1 + <...>-1729 [001] d... 855.874978: #9 inner/outer(us): 9/11 ts:1581527660.861991877 count:1 + <...>-1729 [001] d... 863.938932: #10 inner/outer(us): 9/11 ts:1581527668.970010500 count:1 nmi-total:7 nmi-count:1 + <...>-1729 [007] d... 878.050780: #11 inner/outer(us): 9/12 ts:1581527683.385002600 count:1 nmi-total:5 nmi-count:1 + <...>-1729 [007] d... 886.114702: #12 inner/outer(us): 9/12 ts:1581527691.385001600 count:1 The above output is somewhat the same in the header. All events will have @@ -2148,7 +2194,7 @@ interrupts disabled 'd'. Under the FUNCTION title there is: This is the count of events recorded that were greater than the tracing_threshold (See below). - inner/outer(us): 12/14 + inner/outer(us): 11/11 This shows two numbers as "inner latency" and "outer latency". The test runs in a loop checking a timestamp twice. The latency detected within @@ -2156,11 +2202,15 @@ interrupts disabled 'd'. Under the FUNCTION title there is: after the previous timestamp and the next timestamp in the loop is the "outer latency". - ts:1499801089.066141940 + ts:1581527483.343962693 + + The absolute timestamp that the first latency was recorded in the window. + + count:6 - The absolute timestamp that the event happened. + The number of times a latency was detected during the window. - nmi-total:4 nmi-count:1 + nmi-total:7 nmi-count:1 On architectures that support it, if an NMI comes in during the test, the time spent in NMI is reported in "nmi-total" (in diff --git a/drivers/Kconfig b/drivers/Kconfig index c7396659fdbe..74920bda3fda 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -200,6 +200,8 @@ source "drivers/thunderbolt/Kconfig" source "drivers/android/Kconfig" +source "drivers/gpu/trace/Kconfig" + source "drivers/nvdimm/Kconfig" source "drivers/dax/Kconfig" diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index f17d01f076c7..835c88318cec 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_TEGRA_HOST1X) += host1x/ obj-y += drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ +obj-$(CONFIG_TRACE_GPU_MEM) += trace/ diff --git a/drivers/gpu/trace/Kconfig b/drivers/gpu/trace/Kconfig new file mode 100644 index 000000000000..c24e9edd022e --- /dev/null +++ b/drivers/gpu/trace/Kconfig @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config TRACE_GPU_MEM + bool diff --git a/drivers/gpu/trace/Makefile b/drivers/gpu/trace/Makefile new file mode 100644 index 000000000000..b70fbdc5847f --- /dev/null +++ b/drivers/gpu/trace/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_TRACE_GPU_MEM) += trace_gpu_mem.o diff --git a/drivers/gpu/trace/trace_gpu_mem.c b/drivers/gpu/trace/trace_gpu_mem.c new file mode 100644 index 000000000000..01e855897b6d --- /dev/null +++ b/drivers/gpu/trace/trace_gpu_mem.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPU memory trace points + * + * Copyright (C) 2020 Google, Inc. + */ + +#include <linux/module.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/gpu_mem.h> + +EXPORT_TRACEPOINT_SYMBOL(gpu_mem_total); diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index d11e183fcb54..9903088891fa 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -216,7 +216,8 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node, } /* XBC node initializer */ -int __init xbc_init(char *buf); +int __init xbc_init(char *buf, const char **emsg, int *epos); + /* XBC cleanup data structures */ void __init xbc_destroy_all(void); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index df0124eabece..c76b2f3b3ac4 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -135,10 +135,10 @@ void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); -struct ring_buffer_event * -ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); +void ring_buffer_iter_advance(struct ring_buffer_iter *iter); void ring_buffer_iter_reset(struct ring_buffer_iter *iter); int ring_buffer_iter_empty(struct ring_buffer_iter *iter); +bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter); unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 6c7a10a6d71e..5c6943354049 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -85,6 +85,8 @@ struct trace_iterator { struct mutex mutex; struct ring_buffer_iter **buffer_iter; unsigned long iter_flags; + void *temp; /* temp holder */ + unsigned int temp_size; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; diff --git a/include/trace/events/gpu_mem.h b/include/trace/events/gpu_mem.h new file mode 100644 index 000000000000..1897822a9150 --- /dev/null +++ b/include/trace/events/gpu_mem.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GPU memory trace points + * + * Copyright (C) 2020 Google, Inc. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_mem + +#if !defined(_TRACE_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GPU_MEM_H + +#include <linux/tracepoint.h> + +/* + * The gpu_memory_total event indicates that there's an update to either the + * global or process total gpu memory counters. + * + * This event should be emitted whenever the kernel device driver allocates, + * frees, imports, unimports memory in the GPU addressable space. + * + * @gpu_id: This is the gpu id. + * + * @pid: Put 0 for global total, while positive pid for process total. + * + * @size: Virtual size of the allocation in bytes. + * + */ +TRACE_EVENT(gpu_mem_total, + + TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size), + + TP_ARGS(gpu_id, pid, size), + + TP_STRUCT__entry( + __field(uint32_t, gpu_id) + __field(uint32_t, pid) + __field(uint64_t, size) + ), + + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->pid = pid; + __entry->size = size; + ), + + TP_printk("gpu_id=%u pid=%u size=%llu", + __entry->gpu_id, + __entry->pid, + __entry->size) +); + +#endif /* _TRACE_GPU_MEM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/init/main.c b/init/main.c index ee4947af823f..e488213857e2 100644 --- a/init/main.c +++ b/init/main.c @@ -353,6 +353,8 @@ static int __init bootconfig_params(char *param, char *val, static void __init setup_boot_config(const char *cmdline) { static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; + const char *msg; + int pos; u32 size, csum; char *data, *copy; u32 *hdr; @@ -400,10 +402,14 @@ static void __init setup_boot_config(const char *cmdline) memcpy(copy, data, size); copy[size] = '\0'; - ret = xbc_init(copy); - if (ret < 0) - pr_err("Failed to parse bootconfig\n"); - else { + ret = xbc_init(copy, &msg, &pos); + if (ret < 0) { + if (pos < 0) + pr_err("Failed to init bootconfig: %s.\n", msg); + else + pr_err("Failed to parse bootconfig: %s at %d.\n", + msg, pos); + } else { pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret); /* keys starting with "kernel." are passed via cmdline */ extra_command_line = xbc_make_cmdline("kernel"); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fd81c7de77a7..041694a1eb74 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -102,7 +102,7 @@ static bool ftrace_pids_enabled(struct ftrace_ops *ops) tr = ops->private; - return tr->function_pids != NULL; + return tr->function_pids != NULL || tr->function_no_pids != NULL; } static void ftrace_update_trampoline(struct ftrace_ops *ops); @@ -139,13 +139,23 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) #endif } +#define FTRACE_PID_IGNORE -1 +#define FTRACE_PID_TRACE -2 + static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) { struct trace_array *tr = op->private; + int pid; - if (tr && this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid)) - return; + if (tr) { + pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); + if (pid == FTRACE_PID_IGNORE) + return; + if (pid != FTRACE_PID_TRACE && + pid != current->pid) + return; + } op->saved_func(ip, parent_ip, op, regs); } @@ -6923,11 +6933,17 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, { struct trace_array *tr = data; struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; pid_list = rcu_dereference_sched(tr->function_pids); + no_pid_list = rcu_dereference_sched(tr->function_no_pids); - this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, - trace_ignore_this_task(pid_list, next)); + if (trace_ignore_this_task(pid_list, no_pid_list, next)) + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + FTRACE_PID_IGNORE); + else + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + next->pid); } static void @@ -6940,6 +6956,9 @@ ftrace_pid_follow_sched_process_fork(void *data, pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, self, task); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + trace_filter_add_remove_task(pid_list, self, task); } static void @@ -6950,6 +6969,9 @@ ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task) pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, NULL, task); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + trace_filter_add_remove_task(pid_list, NULL, task); } void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) @@ -6967,42 +6989,57 @@ void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) } } -static void clear_ftrace_pids(struct trace_array *tr) +static void clear_ftrace_pids(struct trace_array *tr, int type) { struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; int cpu; pid_list = rcu_dereference_protected(tr->function_pids, lockdep_is_held(&ftrace_lock)); - if (!pid_list) + no_pid_list = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + + /* Make sure there's something to do */ + if (!pid_type_enabled(type, pid_list, no_pid_list)) return; - unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); + /* See if the pids still need to be checked after this */ + if (!still_need_pid_events(type, pid_list, no_pid_list)) { + unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); + for_each_possible_cpu(cpu) + per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = FTRACE_PID_TRACE; + } - for_each_possible_cpu(cpu) - per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = false; + if (type & TRACE_PIDS) + rcu_assign_pointer(tr->function_pids, NULL); - rcu_assign_pointer(tr->function_pids, NULL); + if (type & TRACE_NO_PIDS) + rcu_assign_pointer(tr->function_no_pids, NULL); /* Wait till all users are no longer using pid filtering */ synchronize_rcu(); - trace_free_pid_list(pid_list); + if ((type & TRACE_PIDS) && pid_list) + trace_free_pid_list(pid_list); + + if ((type & TRACE_NO_PIDS) && no_pid_list) + trace_free_pid_list(no_pid_list); } void ftrace_clear_pids(struct trace_array *tr) { mutex_lock(&ftrace_lock); - clear_ftrace_pids(tr); + clear_ftrace_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); mutex_unlock(&ftrace_lock); } -static void ftrace_pid_reset(struct trace_array *tr) +static void ftrace_pid_reset(struct trace_array *tr, int type) { mutex_lock(&ftrace_lock); - clear_ftrace_pids(tr); + clear_ftrace_pids(tr, type); ftrace_update_pid_func(); ftrace_startup_all(0); @@ -7066,9 +7103,45 @@ static const struct seq_operations ftrace_pid_sops = { .show = fpid_show, }; -static int -ftrace_pid_open(struct inode *inode, struct file *file) +static void *fnpid_start(struct seq_file *m, loff_t *pos) + __acquires(RCU) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = m->private; + + mutex_lock(&ftrace_lock); + rcu_read_lock_sched(); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + + if (!pid_list) + return !(*pos) ? FTRACE_NO_PIDS : NULL; + + return trace_pid_start(pid_list, pos); +} + +static void *fnpid_next(struct seq_file *m, void *v, loff_t *pos) { + struct trace_array *tr = m->private; + struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_no_pids); + + if (v == FTRACE_NO_PIDS) { + (*pos)++; + return NULL; + } + return trace_pid_next(pid_list, v, pos); +} + +static const struct seq_operations ftrace_no_pid_sops = { + .start = fnpid_start, + .next = fnpid_next, + .stop = fpid_stop, + .show = fpid_show, +}; + +static int pid_open(struct inode *inode, struct file *file, int type) +{ + const struct seq_operations *seq_ops; struct trace_array *tr = inode->i_private; struct seq_file *m; int ret = 0; @@ -7079,9 +7152,18 @@ ftrace_pid_open(struct inode *inode, struct file *file) if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - ftrace_pid_reset(tr); + ftrace_pid_reset(tr, type); + + switch (type) { + case TRACE_PIDS: + seq_ops = &ftrace_pid_sops; + break; + case TRACE_NO_PIDS: + seq_ops = &ftrace_no_pid_sops; + break; + } - ret = seq_open(file, &ftrace_pid_sops); + ret = seq_open(file, seq_ops); if (ret < 0) { trace_array_put(tr); } else { @@ -7093,10 +7175,23 @@ ftrace_pid_open(struct inode *inode, struct file *file) return ret; } +static int +ftrace_pid_open(struct inode *inode, struct file *file) +{ + return pid_open(inode, file, TRACE_PIDS); +} + +static int +ftrace_no_pid_open(struct inode *inode, struct file *file) +{ + return pid_open(inode, file, TRACE_NO_PIDS); +} + static void ignore_task_cpu(void *data) { struct trace_array *tr = data; struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; /* * This function is called by on_each_cpu() while the @@ -7104,18 +7199,25 @@ static void ignore_task_cpu(void *data) */ pid_list = rcu_dereference_protected(tr->function_pids, mutex_is_locked(&ftrace_lock)); + no_pid_list = rcu_dereference_protected(tr->function_no_pids, + mutex_is_locked(&ftrace_lock)); - this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, - trace_ignore_this_task(pid_list, current)); + if (trace_ignore_this_task(pid_list, no_pid_list, current)) + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + FTRACE_PID_IGNORE); + else + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + current->pid); } static ssize_t -ftrace_pid_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, int type) { struct seq_file *m = filp->private_data; struct trace_array *tr = m->private; - struct trace_pid_list *filtered_pids = NULL; + struct trace_pid_list *filtered_pids; + struct trace_pid_list *other_pids; struct trace_pid_list *pid_list; ssize_t ret; @@ -7124,19 +7226,39 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, mutex_lock(&ftrace_lock); - filtered_pids = rcu_dereference_protected(tr->function_pids, + switch (type) { + case TRACE_PIDS: + filtered_pids = rcu_dereference_protected(tr->function_pids, + lockdep_is_held(&ftrace_lock)); + other_pids = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + break; + case TRACE_NO_PIDS: + filtered_pids = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + other_pids = rcu_dereference_protected(tr->function_pids, lockdep_is_held(&ftrace_lock)); + break; + } ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); if (ret < 0) goto out; - rcu_assign_pointer(tr->function_pids, pid_list); + switch (type) { + case TRACE_PIDS: + rcu_assign_pointer(tr->function_pids, pid_list); + break; + case TRACE_NO_PIDS: + rcu_assign_pointer(tr->function_no_pids, pid_list); + break; + } + if (filtered_pids) { synchronize_rcu(); trace_free_pid_list(filtered_pids); - } else if (pid_list) { + } else if (pid_list && !other_pids) { /* Register a probe to set whether to ignore the tracing of a task */ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); } @@ -7159,6 +7281,20 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, return ret; } +static ssize_t +ftrace_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); +} + +static ssize_t +ftrace_no_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); +} + static int ftrace_pid_release(struct inode *inode, struct file *file) { @@ -7177,10 +7313,20 @@ static const struct file_operations ftrace_pid_fops = { .release = ftrace_pid_release, }; +static const struct file_operations ftrace_no_pid_fops = { + .open = ftrace_no_pid_open, + .write = ftrace_no_pid_write, + .read = seq_read, + .llseek = tracing_lseek, + .release = ftrace_pid_release, +}; + void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer) { trace_create_file("set_ftrace_pid", 0644, d_tracer, tr, &ftrace_pid_fops); + trace_create_file("set_ftrace_notrace_pid", 0644, d_tracer, + tr, &ftrace_no_pid_fops); } void __init ftrace_init_tracefs_toplevel(struct trace_array *tr, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 61f0e92ace99..6f0b42ceeb00 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -441,6 +441,7 @@ enum { struct ring_buffer_per_cpu { int cpu; atomic_t record_disabled; + atomic_t resize_disabled; struct trace_buffer *buffer; raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; @@ -484,7 +485,6 @@ struct trace_buffer { unsigned flags; int cpus; atomic_t record_disabled; - atomic_t resize_disabled; cpumask_var_t cpumask; struct lock_class_key *reader_lock_key; @@ -503,10 +503,14 @@ struct trace_buffer { struct ring_buffer_iter { struct ring_buffer_per_cpu *cpu_buffer; unsigned long head; + unsigned long next_event; struct buffer_page *head_page; struct buffer_page *cache_reader_page; unsigned long cache_read; u64 read_stamp; + u64 page_stamp; + struct ring_buffer_event *event; + int missed_events; }; /** @@ -1737,18 +1741,24 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, size = nr_pages * BUF_PAGE_SIZE; - /* - * Don't succeed if resizing is disabled, as a reader might be - * manipulating the ring buffer and is expecting a sane state while - * this is true. - */ - if (atomic_read(&buffer->resize_disabled)) - return -EBUSY; - /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); + if (cpu_id == RING_BUFFER_ALL_CPUS) { + /* + * Don't succeed if resizing is disabled, as a reader might be + * manipulating the ring buffer and is expecting a sane state while + * this is true. + */ + for_each_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; + if (atomic_read(&cpu_buffer->resize_disabled)) { + err = -EBUSY; + goto out_err_unlock; + } + } + /* calculate the pages to update */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; @@ -1816,6 +1826,16 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, if (nr_pages == cpu_buffer->nr_pages) goto out; + /* + * Don't succeed if resizing is disabled, as a reader might be + * manipulating the ring buffer and is expecting a sane state while + * this is true. + */ + if (atomic_read(&cpu_buffer->resize_disabled)) { + err = -EBUSY; + goto out_err_unlock; + } + cpu_buffer->nr_pages_to_update = nr_pages - cpu_buffer->nr_pages; @@ -1885,6 +1905,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, free_buffer_page(bpage); } } + out_err_unlock: mutex_unlock(&buffer->mutex); return err; } @@ -1913,15 +1934,63 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline struct ring_buffer_event * -rb_iter_head_event(struct ring_buffer_iter *iter) +static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) { - return __rb_page_index(iter->head_page, iter->head); + return local_read(&bpage->page->commit); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) +static struct ring_buffer_event * +rb_iter_head_event(struct ring_buffer_iter *iter) { - return local_read(&bpage->page->commit); + struct ring_buffer_event *event; + struct buffer_page *iter_head_page = iter->head_page; + unsigned long commit; + unsigned length; + + if (iter->head != iter->next_event) + return iter->event; + + /* + * When the writer goes across pages, it issues a cmpxchg which + * is a mb(), which will synchronize with the rmb here. + * (see rb_tail_page_update() and __rb_reserve_next()) + */ + commit = rb_page_commit(iter_head_page); + smp_rmb(); + event = __rb_page_index(iter_head_page, iter->head); + length = rb_event_length(event); + + /* + * READ_ONCE() doesn't work on functions and we don't want the + * compiler doing any crazy optimizations with length. + */ + barrier(); + + if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE) + /* Writer corrupted the read? */ + goto reset; + + memcpy(iter->event, event, length); + /* + * If the page stamp is still the same after this rmb() then the + * event was safely copied w |