summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/ftrace.rst82
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/gpu/Makefile1
-rw-r--r--drivers/gpu/trace/Kconfig4
-rw-r--r--drivers/gpu/trace/Makefile3
-rw-r--r--drivers/gpu/trace/trace_gpu_mem.c13
-rw-r--r--include/linux/bootconfig.h3
-rw-r--r--include/linux/ring_buffer.h4
-rw-r--r--include/linux/trace_events.h2
-rw-r--r--include/trace/events/gpu_mem.h57
-rw-r--r--init/main.c14
-rw-r--r--kernel/trace/ftrace.c200
-rw-r--r--kernel/trace/ring_buffer.c239
-rw-r--r--kernel/trace/trace.c110
-rw-r--r--kernel/trace/trace.h39
-rw-r--r--kernel/trace/trace_entries.h4
-rw-r--r--kernel/trace/trace_events.c280
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--kernel/trace/trace_hwlat.c24
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_output.c19
-rw-r--r--lib/bootconfig.c35
-rw-r--r--tools/bootconfig/Makefile27
-rw-r--r--tools/bootconfig/main.c35
-rwxr-xr-xtools/bootconfig/test-bootconfig.sh14
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc125
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc108
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc2
28 files changed, 1194 insertions, 256 deletions
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index ff658e27d25b..3b5614b1d1a5 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -125,10 +125,13 @@ of ftrace. Here is a list of some of the key files:
trace:
This file holds the output of the trace in a human
- readable format (described below). Note, tracing is temporarily
- disabled when the file is open for reading. Once all readers
- are closed, tracing is re-enabled. Opening this file for
+ readable format (described below). Opening this file for
writing with the O_TRUNC flag clears the ring buffer content.
+ Note, this file is not a consumer. If tracing is off
+ (no tracer running, or tracing_on is zero), it will produce
+ the same output each time it is read. When tracing is on,
+ it may produce inconsistent results as it tries to read
+ the entire buffer without consuming it.
trace_pipe:
@@ -142,9 +145,7 @@ of ftrace. Here is a list of some of the key files:
will not be read again with a sequential read. The
"trace" file is static, and if the tracer is not
adding more data, it will display the same
- information every time it is read. Unlike the
- "trace" file, opening this file for reading will not
- temporarily disable tracing.
+ information every time it is read.
trace_options:
@@ -262,6 +263,20 @@ of ftrace. Here is a list of some of the key files:
traced by the function tracer as well. This option will also
cause PIDs of tasks that exit to be removed from the file.
+ set_ftrace_notrace_pid:
+
+ Have the function tracer ignore threads whose PID are listed in
+ this file.
+
+ If the "function-fork" option is set, then when a task whose
+ PID is listed in this file forks, the child's PID will
+ automatically be added to this file, and the child will not be
+ traced by the function tracer as well. This option will also
+ cause PIDs of tasks that exit to be removed from the file.
+
+ If a PID is in both this file and "set_ftrace_pid", then this
+ file takes precedence, and the thread will not be traced.
+
set_event_pid:
Have the events only trace a task with a PID listed in this file.
@@ -273,6 +288,19 @@ of ftrace. Here is a list of some of the key files:
cause the PIDs of tasks to be removed from this file when the task
exits.
+ set_event_notrace_pid:
+
+ Have the events not trace a task with a PID listed in this file.
+ Note, sched_switch and sched_wakeup will trace threads not listed
+ in this file, even if a thread's PID is in the file if the
+ sched_switch or sched_wakeup events also trace a thread that should
+ be traced.
+
+ To have the PIDs of children of tasks with their PID in this file
+ added on fork, enable the "event-fork" option. That option will also
+ cause the PIDs of tasks to be removed from this file when the task
+ exits.
+
set_graph_function:
Functions listed in this file will cause the function graph
@@ -1125,6 +1153,12 @@ Here are the available options:
the trace displays additional information about the
latency, as described in "Latency trace format".
+ pause-on-trace
+ When set, opening the trace file for read, will pause
+ writing to the ring buffer (as if tracing_on was set to zero).
+ This simulates the original behavior of the trace file.
+ When the file is closed, tracing will be enabled again.
+
record-cmd
When any event or tracer is enabled, a hook is enabled
in the sched_switch trace point to fill comm cache
@@ -1176,6 +1210,8 @@ Here are the available options:
tasks fork. Also, when tasks with PIDs in set_event_pid exit,
their PIDs will be removed from the file.
+ This affects PIDs listed in set_event_notrace_pid as well.
+
function-trace
The latency tracers will enable function tracing
if this option is enabled (default it is). When
@@ -1190,6 +1226,8 @@ Here are the available options:
set_ftrace_pid exit, their PIDs will be removed from the
file.
+ This affects PIDs in set_ftrace_notrace_pid as well.
+
display-graph
When set, the latency tracers (irqsoff, wakeup, etc) will
use function graph tracing instead of function tracing.
@@ -2126,6 +2164,8 @@ periodically make a CPU constantly busy with interrupts disabled.
# cat trace
# tracer: hwlat
#
+ # entries-in-buffer/entries-written: 13/13 #P:8
+ #
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
@@ -2133,12 +2173,18 @@ periodically make a CPU constantly busy with interrupts disabled.
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
- <...>-3638 [001] d... 19452.055471: #1 inner/outer(us): 12/14 ts:1499801089.066141940
- <...>-3638 [003] d... 19454.071354: #2 inner/outer(us): 11/9 ts:1499801091.082164365
- <...>-3638 [002] dn.. 19461.126852: #3 inner/outer(us): 12/9 ts:1499801098.138150062
- <...>-3638 [001] d... 19488.340960: #4 inner/outer(us): 8/12 ts:1499801125.354139633
- <...>-3638 [003] d... 19494.388553: #5 inner/outer(us): 8/12 ts:1499801131.402150961
- <...>-3638 [003] d... 19501.283419: #6 inner/outer(us): 0/12 ts:1499801138.297435289 nmi-total:4 nmi-count:1
+ <...>-1729 [001] d... 678.473449: #1 inner/outer(us): 11/12 ts:1581527483.343962693 count:6
+ <...>-1729 [004] d... 689.556542: #2 inner/outer(us): 16/9 ts:1581527494.889008092 count:1
+ <...>-1729 [005] d... 714.756290: #3 inner/outer(us): 16/16 ts:1581527519.678961629 count:5
+ <...>-1729 [001] d... 718.788247: #4 inner/outer(us): 9/17 ts:1581527523.889012713 count:1
+ <...>-1729 [002] d... 719.796341: #5 inner/outer(us): 13/9 ts:1581527524.912872606 count:1
+ <...>-1729 [006] d... 844.787091: #6 inner/outer(us): 9/12 ts:1581527649.889048502 count:2
+ <...>-1729 [003] d... 849.827033: #7 inner/outer(us): 18/9 ts:1581527654.889013793 count:1
+ <...>-1729 [007] d... 853.859002: #8 inner/outer(us): 9/12 ts:1581527658.889065736 count:1
+ <...>-1729 [001] d... 855.874978: #9 inner/outer(us): 9/11 ts:1581527660.861991877 count:1
+ <...>-1729 [001] d... 863.938932: #10 inner/outer(us): 9/11 ts:1581527668.970010500 count:1 nmi-total:7 nmi-count:1
+ <...>-1729 [007] d... 878.050780: #11 inner/outer(us): 9/12 ts:1581527683.385002600 count:1 nmi-total:5 nmi-count:1
+ <...>-1729 [007] d... 886.114702: #12 inner/outer(us): 9/12 ts:1581527691.385001600 count:1
The above output is somewhat the same in the header. All events will have
@@ -2148,7 +2194,7 @@ interrupts disabled 'd'. Under the FUNCTION title there is:
This is the count of events recorded that were greater than the
tracing_threshold (See below).
- inner/outer(us): 12/14
+ inner/outer(us): 11/11
This shows two numbers as "inner latency" and "outer latency". The test
runs in a loop checking a timestamp twice. The latency detected within
@@ -2156,11 +2202,15 @@ interrupts disabled 'd'. Under the FUNCTION title there is:
after the previous timestamp and the next timestamp in the loop is
the "outer latency".
- ts:1499801089.066141940
+ ts:1581527483.343962693
+
+ The absolute timestamp that the first latency was recorded in the window.
+
+ count:6
- The absolute timestamp that the event happened.
+ The number of times a latency was detected during the window.
- nmi-total:4 nmi-count:1
+ nmi-total:7 nmi-count:1
On architectures that support it, if an NMI comes in during the
test, the time spent in NMI is reported in "nmi-total" (in
diff --git a/drivers/Kconfig b/drivers/Kconfig
index c7396659fdbe..74920bda3fda 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -200,6 +200,8 @@ source "drivers/thunderbolt/Kconfig"
source "drivers/android/Kconfig"
+source "drivers/gpu/trace/Kconfig"
+
source "drivers/nvdimm/Kconfig"
source "drivers/dax/Kconfig"
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index f17d01f076c7..835c88318cec 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -5,3 +5,4 @@
obj-$(CONFIG_TEGRA_HOST1X) += host1x/
obj-y += drm/ vga/
obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/
+obj-$(CONFIG_TRACE_GPU_MEM) += trace/
diff --git a/drivers/gpu/trace/Kconfig b/drivers/gpu/trace/Kconfig
new file mode 100644
index 000000000000..c24e9edd022e
--- /dev/null
+++ b/drivers/gpu/trace/Kconfig
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config TRACE_GPU_MEM
+ bool
diff --git a/drivers/gpu/trace/Makefile b/drivers/gpu/trace/Makefile
new file mode 100644
index 000000000000..b70fbdc5847f
--- /dev/null
+++ b/drivers/gpu/trace/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_TRACE_GPU_MEM) += trace_gpu_mem.o
diff --git a/drivers/gpu/trace/trace_gpu_mem.c b/drivers/gpu/trace/trace_gpu_mem.c
new file mode 100644
index 000000000000..01e855897b6d
--- /dev/null
+++ b/drivers/gpu/trace/trace_gpu_mem.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GPU memory trace points
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+
+#include <linux/module.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gpu_mem.h>
+
+EXPORT_TRACEPOINT_SYMBOL(gpu_mem_total);
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index d11e183fcb54..9903088891fa 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -216,7 +216,8 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node,
}
/* XBC node initializer */
-int __init xbc_init(char *buf);
+int __init xbc_init(char *buf, const char **emsg, int *epos);
+
/* XBC cleanup data structures */
void __init xbc_destroy_all(void);
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index df0124eabece..c76b2f3b3ac4 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -135,10 +135,10 @@ void ring_buffer_read_finish(struct ring_buffer_iter *iter);
struct ring_buffer_event *
ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts);
-struct ring_buffer_event *
-ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
+void ring_buffer_iter_advance(struct ring_buffer_iter *iter);
void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
+bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter);
unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 6c7a10a6d71e..5c6943354049 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -85,6 +85,8 @@ struct trace_iterator {
struct mutex mutex;
struct ring_buffer_iter **buffer_iter;
unsigned long iter_flags;
+ void *temp; /* temp holder */
+ unsigned int temp_size;
/* trace_seq for __print_flags() and __print_symbolic() etc. */
struct trace_seq tmp_seq;
diff --git a/include/trace/events/gpu_mem.h b/include/trace/events/gpu_mem.h
new file mode 100644
index 000000000000..1897822a9150
--- /dev/null
+++ b/include/trace/events/gpu_mem.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GPU memory trace points
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu_mem
+
+#if !defined(_TRACE_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GPU_MEM_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * The gpu_memory_total event indicates that there's an update to either the
+ * global or process total gpu memory counters.
+ *
+ * This event should be emitted whenever the kernel device driver allocates,
+ * frees, imports, unimports memory in the GPU addressable space.
+ *
+ * @gpu_id: This is the gpu id.
+ *
+ * @pid: Put 0 for global total, while positive pid for process total.
+ *
+ * @size: Virtual size of the allocation in bytes.
+ *
+ */
+TRACE_EVENT(gpu_mem_total,
+
+ TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size),
+
+ TP_ARGS(gpu_id, pid, size),
+
+ TP_STRUCT__entry(
+ __field(uint32_t, gpu_id)
+ __field(uint32_t, pid)
+ __field(uint64_t, size)
+ ),
+
+ TP_fast_assign(
+ __entry->gpu_id = gpu_id;
+ __entry->pid = pid;
+ __entry->size = size;
+ ),
+
+ TP_printk("gpu_id=%u pid=%u size=%llu",
+ __entry->gpu_id,
+ __entry->pid,
+ __entry->size)
+);
+
+#endif /* _TRACE_GPU_MEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/main.c b/init/main.c
index ee4947af823f..e488213857e2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -353,6 +353,8 @@ static int __init bootconfig_params(char *param, char *val,
static void __init setup_boot_config(const char *cmdline)
{
static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
+ const char *msg;
+ int pos;
u32 size, csum;
char *data, *copy;
u32 *hdr;
@@ -400,10 +402,14 @@ static void __init setup_boot_config(const char *cmdline)
memcpy(copy, data, size);
copy[size] = '\0';
- ret = xbc_init(copy);
- if (ret < 0)
- pr_err("Failed to parse bootconfig\n");
- else {
+ ret = xbc_init(copy, &msg, &pos);
+ if (ret < 0) {
+ if (pos < 0)
+ pr_err("Failed to init bootconfig: %s.\n", msg);
+ else
+ pr_err("Failed to parse bootconfig: %s at %d.\n",
+ msg, pos);
+ } else {
pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret);
/* keys starting with "kernel." are passed via cmdline */
extra_command_line = xbc_make_cmdline("kernel");
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fd81c7de77a7..041694a1eb74 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -102,7 +102,7 @@ static bool ftrace_pids_enabled(struct ftrace_ops *ops)
tr = ops->private;
- return tr->function_pids != NULL;
+ return tr->function_pids != NULL || tr->function_no_pids != NULL;
}
static void ftrace_update_trampoline(struct ftrace_ops *ops);
@@ -139,13 +139,23 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
#endif
}
+#define FTRACE_PID_IGNORE -1
+#define FTRACE_PID_TRACE -2
+
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs)
{
struct trace_array *tr = op->private;
+ int pid;
- if (tr && this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid))
- return;
+ if (tr) {
+ pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid);
+ if (pid == FTRACE_PID_IGNORE)
+ return;
+ if (pid != FTRACE_PID_TRACE &&
+ pid != current->pid)
+ return;
+ }
op->saved_func(ip, parent_ip, op, regs);
}
@@ -6923,11 +6933,17 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
+ struct trace_pid_list *no_pid_list;
pid_list = rcu_dereference_sched(tr->function_pids);
+ no_pid_list = rcu_dereference_sched(tr->function_no_pids);
- this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
- trace_ignore_this_task(pid_list, next));
+ if (trace_ignore_this_task(pid_list, no_pid_list, next))
+ this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
+ FTRACE_PID_IGNORE);
+ else
+ this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
+ next->pid);
}
static void
@@ -6940,6 +6956,9 @@ ftrace_pid_follow_sched_process_fork(void *data,
pid_list = rcu_dereference_sched(tr->function_pids);
trace_filter_add_remove_task(pid_list, self, task);
+
+ pid_list = rcu_dereference_sched(tr->function_no_pids);
+ trace_filter_add_remove_task(pid_list, self, task);
}
static void
@@ -6950,6 +6969,9 @@ ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task)
pid_list = rcu_dereference_sched(tr->function_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
+
+ pid_list = rcu_dereference_sched(tr->function_no_pids);
+ trace_filter_add_remove_task(pid_list, NULL, task);
}
void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
@@ -6967,42 +6989,57 @@ void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
}
}
-static void clear_ftrace_pids(struct trace_array *tr)
+static void clear_ftrace_pids(struct trace_array *tr, int type)
{
struct trace_pid_list *pid_list;
+ struct trace_pid_list *no_pid_list;
int cpu;
pid_list = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
- if (!pid_list)
+ no_pid_list = rcu_dereference_protected(tr->function_no_pids,
+ lockdep_is_held(&ftrace_lock));
+
+ /* Make sure there's something to do */
+ if (!pid_type_enabled(type, pid_list, no_pid_list))
return;
- unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
+ /* See if the pids still need to be checked after this */
+ if (!still_need_pid_events(type, pid_list, no_pid_list)) {
+ unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = FTRACE_PID_TRACE;
+ }
- for_each_possible_cpu(cpu)
- per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = false;
+ if (type & TRACE_PIDS)
+ rcu_assign_pointer(tr->function_pids, NULL);
- rcu_assign_pointer(tr->function_pids, NULL);
+ if (type & TRACE_NO_PIDS)
+ rcu_assign_pointer(tr->function_no_pids, NULL);
/* Wait till all users are no longer using pid filtering */
synchronize_rcu();
- trace_free_pid_list(pid_list);
+ if ((type & TRACE_PIDS) && pid_list)
+ trace_free_pid_list(pid_list);
+
+ if ((type & TRACE_NO_PIDS) && no_pid_list)
+ trace_free_pid_list(no_pid_list);
}
void ftrace_clear_pids(struct trace_array *tr)
{
mutex_lock(&ftrace_lock);
- clear_ftrace_pids(tr);
+ clear_ftrace_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
mutex_unlock(&ftrace_lock);
}
-static void ftrace_pid_reset(struct trace_array *tr)
+static void ftrace_pid_reset(struct trace_array *tr, int type)
{
mutex_lock(&ftrace_lock);
- clear_ftrace_pids(tr);
+ clear_ftrace_pids(tr, type);
ftrace_update_pid_func();
ftrace_startup_all(0);
@@ -7066,9 +7103,45 @@ static const struct seq_operations ftrace_pid_sops = {
.show = fpid_show,
};
-static int
-ftrace_pid_open(struct inode *inode, struct file *file)
+static void *fnpid_start(struct seq_file *m, loff_t *pos)
+ __acquires(RCU)
+{
+ struct trace_pid_list *pid_list;
+ struct trace_array *tr = m->private;
+
+ mutex_lock(&ftrace_lock);
+ rcu_read_lock_sched();
+
+ pid_list = rcu_dereference_sched(tr->function_no_pids);
+
+ if (!pid_list)
+ return !(*pos) ? FTRACE_NO_PIDS : NULL;
+
+ return trace_pid_start(pid_list, pos);
+}
+
+static void *fnpid_next(struct seq_file *m, void *v, loff_t *pos)
{
+ struct trace_array *tr = m->private;
+ struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_no_pids);
+
+ if (v == FTRACE_NO_PIDS) {
+ (*pos)++;
+ return NULL;
+ }
+ return trace_pid_next(pid_list, v, pos);
+}
+
+static const struct seq_operations ftrace_no_pid_sops = {
+ .start = fnpid_start,
+ .next = fnpid_next,
+ .stop = fpid_stop,
+ .show = fpid_show,
+};
+
+static int pid_open(struct inode *inode, struct file *file, int type)
+{
+ const struct seq_operations *seq_ops;
struct trace_array *tr = inode->i_private;
struct seq_file *m;
int ret = 0;
@@ -7079,9 +7152,18 @@ ftrace_pid_open(struct inode *inode, struct file *file)
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
- ftrace_pid_reset(tr);
+ ftrace_pid_reset(tr, type);
+
+ switch (type) {
+ case TRACE_PIDS:
+ seq_ops = &ftrace_pid_sops;
+ break;
+ case TRACE_NO_PIDS:
+ seq_ops = &ftrace_no_pid_sops;
+ break;
+ }
- ret = seq_open(file, &ftrace_pid_sops);
+ ret = seq_open(file, seq_ops);
if (ret < 0) {
trace_array_put(tr);
} else {
@@ -7093,10 +7175,23 @@ ftrace_pid_open(struct inode *inode, struct file *file)
return ret;
}
+static int
+ftrace_pid_open(struct inode *inode, struct file *file)
+{
+ return pid_open(inode, file, TRACE_PIDS);
+}
+
+static int
+ftrace_no_pid_open(struct inode *inode, struct file *file)
+{
+ return pid_open(inode, file, TRACE_NO_PIDS);
+}
+
static void ignore_task_cpu(void *data)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
+ struct trace_pid_list *no_pid_list;
/*
* This function is called by on_each_cpu() while the
@@ -7104,18 +7199,25 @@ static void ignore_task_cpu(void *data)
*/
pid_list = rcu_dereference_protected(tr->function_pids,
mutex_is_locked(&ftrace_lock));
+ no_pid_list = rcu_dereference_protected(tr->function_no_pids,
+ mutex_is_locked(&ftrace_lock));
- this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
- trace_ignore_this_task(pid_list, current));
+ if (trace_ignore_this_task(pid_list, no_pid_list, current))
+ this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
+ FTRACE_PID_IGNORE);
+ else
+ this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid,
+ current->pid);
}
static ssize_t
-ftrace_pid_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+pid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos, int type)
{
struct seq_file *m = filp->private_data;
struct trace_array *tr = m->private;
- struct trace_pid_list *filtered_pids = NULL;
+ struct trace_pid_list *filtered_pids;
+ struct trace_pid_list *other_pids;
struct trace_pid_list *pid_list;
ssize_t ret;
@@ -7124,19 +7226,39 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
mutex_lock(&ftrace_lock);
- filtered_pids = rcu_dereference_protected(tr->function_pids,
+ switch (type) {
+ case TRACE_PIDS:
+ filtered_pids = rcu_dereference_protected(tr->function_pids,
+ lockdep_is_held(&ftrace_lock));
+ other_pids = rcu_dereference_protected(tr->function_no_pids,
+ lockdep_is_held(&ftrace_lock));
+ break;
+ case TRACE_NO_PIDS:
+ filtered_pids = rcu_dereference_protected(tr->function_no_pids,
+ lockdep_is_held(&ftrace_lock));
+ other_pids = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
+ break;
+ }
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
if (ret < 0)
goto out;
- rcu_assign_pointer(tr->function_pids, pid_list);
+ switch (type) {
+ case TRACE_PIDS:
+ rcu_assign_pointer(tr->function_pids, pid_list);
+ break;
+ case TRACE_NO_PIDS:
+ rcu_assign_pointer(tr->function_no_pids, pid_list);
+ break;
+ }
+
if (filtered_pids) {
synchronize_rcu();
trace_free_pid_list(filtered_pids);
- } else if (pid_list) {
+ } else if (pid_list && !other_pids) {
/* Register a probe to set whether to ignore the tracing of a task */
register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
}
@@ -7159,6 +7281,20 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
return ret;
}
+static ssize_t
+ftrace_pid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
+}
+
+static ssize_t
+ftrace_no_pid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
+}
+
static int
ftrace_pid_release(struct inode *inode, struct file *file)
{
@@ -7177,10 +7313,20 @@ static const struct file_operations ftrace_pid_fops = {
.release = ftrace_pid_release,
};
+static const struct file_operations ftrace_no_pid_fops = {
+ .open = ftrace_no_pid_open,
+ .write = ftrace_no_pid_write,
+ .read = seq_read,
+ .llseek = tracing_lseek,
+ .release = ftrace_pid_release,
+};
+
void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
{
trace_create_file("set_ftrace_pid", 0644, d_tracer,
tr, &ftrace_pid_fops);
+ trace_create_file("set_ftrace_notrace_pid", 0644, d_tracer,
+ tr, &ftrace_no_pid_fops);
}
void __init ftrace_init_tracefs_toplevel(struct trace_array *tr,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 61f0e92ace99..6f0b42ceeb00 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -441,6 +441,7 @@ enum {
struct ring_buffer_per_cpu {
int cpu;
atomic_t record_disabled;
+ atomic_t resize_disabled;
struct trace_buffer *buffer;
raw_spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
@@ -484,7 +485,6 @@ struct trace_buffer {
unsigned flags;
int cpus;
atomic_t record_disabled;
- atomic_t resize_disabled;
cpumask_var_t cpumask;
struct lock_class_key *reader_lock_key;
@@ -503,10 +503,14 @@ struct trace_buffer {
struct ring_buffer_iter {
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long head;
+ unsigned long next_event;
struct buffer_page *head_page;
struct buffer_page *cache_reader_page;
unsigned long cache_read;
u64 read_stamp;
+ u64 page_stamp;
+ struct ring_buffer_event *event;
+ int missed_events;
};
/**
@@ -1737,18 +1741,24 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
size = nr_pages * BUF_PAGE_SIZE;
- /*
- * Don't succeed if resizing is disabled, as a reader might be
- * manipulating the ring buffer and is expecting a sane state while
- * this is true.
- */
- if (atomic_read(&buffer->resize_disabled))
- return -EBUSY;
-
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
+
if (cpu_id == RING_BUFFER_ALL_CPUS) {
+ /*
+ * Don't succeed if resizing is disabled, as a reader might be
+ * manipulating the ring buffer and is expecting a sane state while
+ * this is true.
+ */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ if (atomic_read(&cpu_buffer->resize_disabled)) {
+ err = -EBUSY;
+ goto out_err_unlock;
+ }
+ }
+
/* calculate the pages to update */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
@@ -1816,6 +1826,16 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
if (nr_pages == cpu_buffer->nr_pages)
goto out;
+ /*
+ * Don't succeed if resizing is disabled, as a reader might be
+ * manipulating the ring buffer and is expecting a sane state while
+ * this is true.
+ */
+ if (atomic_read(&cpu_buffer->resize_disabled)) {
+ err = -EBUSY;
+ goto out_err_unlock;
+ }
+
cpu_buffer->nr_pages_to_update = nr_pages -
cpu_buffer->nr_pages;
@@ -1885,6 +1905,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
free_buffer_page(bpage);
}
}
+ out_err_unlock:
mutex_unlock(&buffer->mutex);
return err;
}
@@ -1913,15 +1934,63 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read);
}
-static __always_inline struct ring_buffer_event *
-rb_iter_head_event(struct ring_buffer_iter *iter)
+static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
{
- return __rb_page_index(iter->head_page, iter->head);
+ return local_read(&bpage->page->commit);
}
-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
+static struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
{
- return local_read(&bpage->page->commit);
+ struct ring_buffer_event *event;
+ struct buffer_page *iter_head_page = iter->head_page;
+ unsigned long commit;
+ unsigned length;
+
+ if (iter->head != iter->next_event)
+ return iter->event;
+
+ /*
+ * When the writer goes across pages, it issues a cmpxchg which
+ * is a mb(), which will synchronize with the rmb here.
+ * (see rb_tail_page_update() and __rb_reserve_next())
+ */
+ commit = rb_page_commit(iter_head_page);
+ smp_rmb();
+ event = __rb_page_index(iter_head_page, iter->head);
+ length = rb_event_length(event);
+
+ /*
+ * READ_ONCE() doesn't work on functions and we don't want the
+ * compiler doing any crazy optimizations with length.
+ */
+ barrier();
+
+ if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
+ /* Writer corrupted the read? */
+ goto reset;
+
+ memcpy(iter->event, event, length);
+ /*
+ * If the page stamp is still the same after this rmb() then the
+ * event was safely copied w