summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt13
-rw-r--r--Documentation/trace/kprobetrace.rst42
-rw-r--r--Documentation/trace/uprobetracer.rst10
-rw-r--r--arch/Kconfig16
-rw-r--r--arch/x86/include/asm/uaccess.h4
-rw-r--r--arch/x86/kernel/ftrace.c6
-rw-r--r--include/linux/ftrace.h4
-rw-r--r--include/linux/trace_events.h9
-rw-r--r--include/linux/uaccess.h20
-rw-r--r--kernel/kprobes.c3
-rw-r--r--kernel/trace/Kconfig12
-rw-r--r--kernel/trace/ftrace.c48
-rw-r--r--kernel/trace/ring_buffer.c17
-rw-r--r--kernel/trace/trace.c17
-rw-r--r--kernel/trace/trace_event_perf.c3
-rw-r--r--kernel/trace/trace_events.c10
-rw-r--r--kernel/trace/trace_events_filter.c3
-rw-r--r--kernel/trace/trace_kprobe.c357
-rw-r--r--kernel/trace/trace_probe.c142
-rw-r--r--kernel/trace/trace_probe.h77
-rw-r--r--kernel/trace/trace_probe_tmpl.h36
-rw-r--r--kernel/trace/trace_uprobe.c180
-rw-r--r--kernel/tracepoint.c4
-rw-r--r--mm/maccess.c122
-rw-r--r--tools/perf/Documentation/perf-probe.txt3
-rw-r--r--tools/perf/util/probe-event.c11
-rw-r--r--tools/perf/util/probe-event.h2
-rw-r--r--tools/perf/util/probe-file.c7
-rw-r--r--tools/perf/util/probe-file.h1
-rw-r--r--tools/perf/util/probe-finder.c19
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest38
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc32
33 files changed, 861 insertions, 411 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a5f4004e8705..f0461456d910 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2011,6 +2011,19 @@
Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
the default is off.
+ kprobe_event=[probe-list]
+ [FTRACE] Add kprobe events and enable at boot time.
+ The probe-list is a semicolon delimited list of probe
+ definitions. Each definition is same as kprobe_events
+ interface, but the parameters are comma delimited.
+ For example, to add a kprobe event on vfs_read with
+ arg1 and arg2, add to the command line;
+
+ kprobe_event=p,vfs_read,$arg1,$arg2
+
+ See also Documentation/trace/kprobetrace.rst "Kernel
+ Boot Parameter" section.
+
kpti= [ARM64] Control page table isolation of user
and kernel address spaces.
Default: enabled on cores which need mitigation.
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 7d2b0178d3f3..fbb314bfa112 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -51,15 +51,17 @@ Synopsis of kprobe_events
$argN : Fetch the Nth function argument. (N >= 1) (\*1)
$retval : Fetch return value.(\*2)
$comm : Fetch current task comm.
- +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(\*3)
+ +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
(u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
- (x8/x16/x32/x64), "string" and bitfield are supported.
+ (x8/x16/x32/x64), "string", "ustring" and bitfield
+ are supported.
(\*1) only for the probe on function entry (offs == 0).
(\*2) only for return probe.
(\*3) this is useful for fetching a field of data structures.
+ (\*4) "u" means user-space dereference. See :ref:`user_mem_access`.
Types
-----
@@ -77,7 +79,8 @@ apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is
wrong, but '+8($stack):x8[8]' is OK.)
String type is a special type, which fetches a "null-terminated" string from
kernel space. This means it will fail and store NULL if the string container
-has been paged out.
+has been paged out. "ustring" type is an alternative of string for user-space.
+See :ref:`user_mem_access` for more info..
The string array type is a bit different from other types. For other base
types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same
as +0(%di):x32.) But string[1] is not equal to string. The string type itself
@@ -92,6 +95,25 @@ Symbol type('symbol') is an alias of u32 or u64 type (depends on BITS_PER_LONG)
which shows given pointer in "symbol+offset" style.
For $comm, the default type is "string"; any other type is invalid.
+.. _user_mem_access:
+User Memory Access
+------------------
+Kprobe events supports user-space memory access. For that purpose, you can use
+either user-space dereference syntax or 'ustring' type.
+
+The user-space dereference syntax allows you to access a field of a data
+structure in user-space. This is done by adding the "u" prefix to the
+dereference syntax. For example, +u4(%si) means it will read memory from the
+address in the register %si offset by 4, and the memory is expected to be in
+user-space. You can use this for strings too, e.g. +u0(%si):string will read
+a string from the address in the register %si that is expected to be in user-
+space. 'ustring' is a shortcut way of performing the same task. That is,
++0(%si):ustring is equivalent to +u0(%si):string.
+
+Note that kprobe-event provides the user-memory access syntax but it doesn't
+use it transparently. This means if you use normal dereference or string type
+for user memory, it might fail, and may always fail on some archs. The user
+has to carefully check if the target data is in kernel or user space.
Per-Probe Event Filtering
-------------------------
@@ -124,6 +146,20 @@ You can check the total number of probe hits and probe miss-hits via
The first column is event name, the second is the number of probe hits,
the third is the number of probe miss-hits.
+Kernel Boot Parameter
+---------------------
+You can add and enable new kprobe events when booting up the kernel by
+"kprobe_event=" parameter. The parameter accepts a semicolon-delimited
+kprobe events, which format is similar to the kprobe_events.
+The difference is that the probe definition parameters are comma-delimited
+instead of space. For example, adding myprobe event on do_sys_open like below
+
+ p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)
+
+should be below for kernel boot parameter (just replace spaces with comma)
+
+ p:myprobe,do_sys_open,dfd=%ax,filename=%dx,flags=%cx,mode=+4($stack)
+
Usage examples
--------------
diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst
index 0b21305fabdc..6e75a6c5a2c8 100644
--- a/Documentation/trace/uprobetracer.rst
+++ b/Documentation/trace/uprobetracer.rst
@@ -42,16 +42,18 @@ Synopsis of uprobe_tracer
@+OFFSET : Fetch memory at OFFSET (OFFSET from same file as PATH)
$stackN : Fetch Nth entry of stack (N >= 0)
$stack : Fetch stack address.
- $retval : Fetch return value.(*)
+ $retval : Fetch return value.(\*1)
$comm : Fetch current task comm.
- +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
+ +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*2)(\*3)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
(u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
(x8/x16/x32/x64), "string" and bitfield are supported.
- (*) only for return probe.
- (**) this is useful for fetching a field of data structures.
+ (\*1) only for return probe.
+ (\*2) this is useful for fetching a field of data structures.
+ (\*3) Unlike kprobe event, "u" prefix will just be ignored, becuse uprobe
+ events can access only user-space memory.
Types
-----
diff --git a/arch/Kconfig b/arch/Kconfig
index e8d19c3cb91f..6dd1faab6ccb 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -128,22 +128,6 @@ config UPROBES
managed by the kernel and kept transparent to the probed
application. )
-config HAVE_64BIT_ALIGNED_ACCESS
- def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
- help
- Some architectures require 64 bit accesses to be 64 bit
- aligned, which also requires structs containing 64 bit values
- to be 64 bit aligned too. This includes some 32 bit
- architectures which can do 64 bit accesses, as well as 64 bit
- architectures without unaligned access.
-
- This symbol should be selected by an architecture if 64 bit
- accesses are required to be 64 bit aligned in this way even
- though it is not a 64 bit architecture.
-
- See Documentation/unaligned-memory-access.txt for more
- information on the topic of unaligned memory accesses.
-
config HAVE_EFFICIENT_UNALIGNED_ACCESS
bool
help
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c82abd6e4ca3..9c4435307ff8 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -66,7 +66,9 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
})
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
+static inline bool pagefault_disabled(void);
+# define WARN_ON_IN_IRQ() \
+ WARN_ON_ONCE(!in_task() && !pagefault_disabled())
#else
# define WARN_ON_IN_IRQ()
#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4b73f5937f41..024c3053dbba 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -373,7 +373,7 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
return add_break(rec->ip, old);
}
-static int add_breakpoints(struct dyn_ftrace *rec, int enable)
+static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
@@ -481,7 +481,7 @@ static int add_update_nop(struct dyn_ftrace *rec)
return add_update_code(ip, new);
}
-static int add_update(struct dyn_ftrace *rec, int enable)
+static int add_update(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
@@ -527,7 +527,7 @@ static int finish_update_nop(struct dyn_ftrace *rec)
return ftrace_write(ip, new, 1);
}
-static int finish_update(struct dyn_ftrace *rec, int enable)
+static int finish_update(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_addr;
int ret;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 25e2995d4a4c..8a8cb3c401b2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -427,8 +427,8 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
iter = ftrace_rec_iter_next(iter))
-int ftrace_update_record(struct dyn_ftrace *rec, int enable);
-int ftrace_test_record(struct dyn_ftrace *rec, int enable);
+int ftrace_update_record(struct dyn_ftrace *rec, bool enable);
+int ftrace_test_record(struct dyn_ftrace *rec, bool enable);
void ftrace_run_stop_machine(int command);
unsigned long ftrace_location(unsigned long ip);
unsigned long ftrace_location_range(unsigned long start, unsigned long end);
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 8a62731673f7..5150436783e8 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -142,6 +142,7 @@ enum print_line_t {
enum print_line_t trace_handle_return(struct trace_seq *s);
void tracing_generic_entry_update(struct trace_entry *entry,
+ unsigned short type,
unsigned long flags,
int pc);
struct trace_event_file;
@@ -317,6 +318,14 @@ trace_event_name(struct trace_event_call *call)
return call->name;
}
+static inline struct list_head *
+trace_get_fields(struct trace_event_call *event_call)
+{
+ if (!event_call->class->get_fields)
+ return &event_call->class->fields;
+ return event_call->class->get_fields(event_call);
+}
+
struct trace_array;
struct trace_subsystem_dir;
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 2b70130af585..34a038563d97 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -203,7 +203,10 @@ static inline void pagefault_enable(void)
/*
* Is the pagefault handler disabled? If so, user access methods will not sleep.
*/
-#define pagefault_disabled() (current->pagefault_disabled != 0)
+static inline bool pagefault_disabled(void)
+{
+ return current->pagefault_disabled != 0;
+}
/*
* The pagefault handler is in general disabled by pagefault_disable() or
@@ -240,6 +243,18 @@ extern long probe_kernel_read(void *dst, const void *src, size_t size);
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
/*
+ * probe_user_read(): safely attempt to read from a location in user space
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from
+ * @size: size of the data chunk
+ *
+ * Safely read from address @src to the buffer at @dst. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long probe_user_read(void *dst, const void __user *src, size_t size);
+extern long __probe_user_read(void *dst, const void __user *src, size_t size);
+
+/*
* probe_kernel_write(): safely attempt to write to a location
* @dst: address to write to
* @src: pointer to the data that shall be written
@@ -252,6 +267,9 @@ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
+ long count);
+extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
/**
* probe_kernel_address(): safely attempt to read from a location
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9f5433a52488..9873fc627d61 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2276,6 +2276,7 @@ static int __init init_kprobes(void)
init_test_probes();
return err;
}
+subsys_initcall(init_kprobes);
#ifdef CONFIG_DEBUG_FS
static void report_probe(struct seq_file *pi, struct kprobe *p,
@@ -2588,5 +2589,3 @@ static int __init debugfs_kprobe_init(void)
late_initcall(debugfs_kprobe_init);
#endif /* CONFIG_DEBUG_FS */
-
-module_init(init_kprobes);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 564e5fdb025f..98da8998c25c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -597,9 +597,19 @@ config FTRACE_STARTUP_TEST
functioning properly. It will do tests on all the configured
tracers of ftrace.
+config EVENT_TRACE_STARTUP_TEST
+ bool "Run selftest on trace events"
+ depends on FTRACE_STARTUP_TEST
+ default y
+ help
+ This option performs a test on all trace events in the system.
+ It basically just enables each event and runs some code that
+ will trigger events (not necessarily the event it enables)
+ This may take some time run as there are a lot of events.
+
config EVENT_TRACE_TEST_SYSCALLS
bool "Run selftest on syscall events"
- depends on FTRACE_STARTUP_TEST
+ depends on EVENT_TRACE_STARTUP_TEST
help
This option will also enable testing every syscall event.
It only enables the event and disables it and runs various loads
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 576c41644e77..eca34503f178 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1622,6 +1622,11 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
return keep_regs;
}
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
+static struct ftrace_ops *
+ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
+
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1750,15 +1755,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
}
/*
- * If the rec had TRAMP enabled, then it needs to
- * be cleared. As TRAMP can only be enabled iff
- * there is only a single ops attached to it.
- * In otherwords, always disable it on decrementing.
- * In the future, we may set it if rec count is
- * decremented to one, and the ops that is left
- * has a trampoline.
+ * The TRAMP needs to be set only if rec count
+ * is decremented to one, and the ops that is
+ * left has a trampoline. As TRAMP can only be
+ * enabled if there is only a single ops attached
+ * to it.
*/
- rec->flags &= ~FTRACE_FL_TRAMP;
+ if (ftrace_rec_count(rec) == 1 &&
+ ftrace_find_tramp_ops_any(rec))
+ rec->flags |= FTRACE_FL_TRAMP;
+ else
+ rec->flags &= ~FTRACE_FL_TRAMP;
/*
* flags will be cleared in ftrace_check_record()
@@ -1768,7 +1775,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
count++;
/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
- update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
+ update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE;
/* Shortcut, if we handled all records, we are done. */
if (!all && count == hash->count)
@@ -1951,11 +1958,6 @@ static void print_ip_ins(const char *fmt, const unsigned char *p)
printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
}
-static struct ftrace_ops *
-ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
-static struct ftrace_ops *
-ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
-
enum ftrace_bug_type ftrace_bug_type;
const void *ftrace_expected;
@@ -2047,7 +2049,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec)
}
}
-static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
+static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
{
unsigned long flag = 0UL;
@@ -2146,28 +2148,28 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
/**
* ftrace_update_record, set a record that now is tracing or not
* @rec: the record to update
- * @enable: set to 1 if the record is tracing, zero to force disable
+ * @enable: set to true if the record is tracing, false to force disable
*
* The records that represent all functions that can be traced need
* to be updated when tracing has been enabled.
*/
-int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+int ftrace_update_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 1);
+ return ftrace_check_record(rec, enable, true);
}
/**
* ftrace_test_record, check if the record has been enabled or not
* @rec: the record to test
- * @enable: set to 1 to check if enabled, 0 if it is disabled
+ * @enable: set to true to check if enabled, false if it is disabled
*
* The arch code may need to test if a record is already set to
* tracing to determine how to modify the function code that it
* represents.
*/
-int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+int ftrace_test_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 0);
+ return ftrace_check_record(rec, enable, false);
}
static struct ftrace_ops *
@@ -2356,7 +2358,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
}
static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
@@ -2395,7 +2397,7 @@ void __weak ftrace_replace_code(int mod_flags)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
+ bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL;
int failed;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 05b0b3139ebc..66358d66c933 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -128,16 +128,7 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
-
-#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
-# define RB_FORCE_8BYTE_ALIGNMENT 0
-# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
-#else
-# define RB_FORCE_8BYTE_ALIGNMENT 1
-# define RB_ARCH_ALIGNMENT 8U
-#endif
-
-#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
+#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT)
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -2373,7 +2364,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+ if (length > RB_MAX_SMALL_DATA) {
event->type_len = 0;
event->array[0] = length;
} else
@@ -2388,11 +2379,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length++;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+ if (length > RB_MAX_SMALL_DATA)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ARCH_ALIGNMENT);
+ length = ALIGN(length, RB_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c90c687cf950..525a97fbbc60 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -366,7 +366,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
}
/**
- * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
+ * trace_filter_add_remove_task - Add or remove a task from a pid_list
* @pid_list: The list to modify
* @self: The current task for fork or NULL for exit
* @task: The task to add or remove
@@ -743,8 +743,7 @@ trace_event_setup(struct ring_buffer_event *event,
{
struct trace_entry *ent = ring_buffer_event_data(event);
- tracing_generic_entry_update(ent, flags, pc);
- ent->type = type;
+ tracing_generic_entry_update(ent, type, flags, pc);
}
static __always_inline struct ring_buffer_event *
@@ -2312,13 +2311,14 @@ enum print_line_t trace_handle_return(struct trace_seq *s)
EXPORT_SYMBOL_GPL(trace_handle_return);
void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
- int pc)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
+ unsigned long flags, int pc)
{
struct task_struct *tsk = current;
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
+ entry->type = type;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -4842,12 +4842,13 @@ static const char readme_msg[] =
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
- "\t $stack<index>, $stack, $retval, $comm, $arg<N>\n"
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#else
- "\t $stack<index>, $stack, $retval, $comm\n"
+ "\t $stack<index>, $stack, $retval, $comm,\n"
#endif
+ "\t +|-[u]<offset>(<fetcharg>)\n"
"\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
- "\t b<bit-width>@<bit-offset>/<container-size>,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
"\t <type>\\[<array-size>\\]\n"
#ifdef CONFIG_HIST_TRIGGERS
"\t field: <stype> <name>;\n"
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 4629a6104474..0892e38ed6fb 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -416,8 +416,7 @@ void perf_trace_buf_update(void *record, u16 type)
unsigned long flags;
local_save_flags(flags);
- tracing_generic_entry_update(entry, flags, pc);
- entry->type = type;
+ tracing_generic_entry_update(entry, type, flags, pc);
}
NOKPROBE_SYMBOL(perf_trace_buf_update);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0ce3db67f556..c7506bc81b75 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -70,14 +70,6 @@ static int system_refcount_dec(struct event_subsystem *system)
#define while_for_each_event_file() \
}
-static struct list_head *
-trace_get_fields(struct trace_event_call *event_call)
-{
- if (!event_call->class->get_fields)
- return &event_call->class->fields;
- return event_call->class->get_fields(event_call);
-}
-
static struct ftrace_event_field *
__find_event_field(struct list_head *head, char *name)
{
@@ -3190,7 +3182,7 @@ void __init trace_event_init(void)
event_trace_enable();
}
-#ifdef CONFIG_FTRACE_STARTUP_TEST
+#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
static DEFINE_SPINLOCK(test_spinlock);
static DEFINE_SPINLOCK(test_spinlock_irq);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 5079d1db3754..c773b8fb270c 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1084,6 +1084,9 @@ int filter_assign_type(const char *type)
if (strchr(type, '[') && strstr(type, "char"))
return FILTER_STATIC_STRING;
+ if (strcmp(type, "char *") == 0 || strcmp(type, "const char *") == 0)
+ return FILTER_PTR_STRING;
+
return FILTER_OTHER;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7d736248a070..9d483ad9bb6c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -12,6 +12,8 @@
#include <linux/rculist.h>
#include <linux/error-injection.h>
+#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
+
#include "trace_dynevent.h"
#include "trace_kprobe_selftest.h"
#include "trace_probe.h"
@@ -19,6 +21,18 @@
#define KPROBE_EVENT_SYSTEM "kprobes"
#define KRETPROBE_MAXACTIVE_MAX 4096
+#define MAX_KPROBE_CMDLINE_SIZE 1024
+
+/* Kprobe early definition from command line */
+static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
+static bool kprobe_boot_events_enabled __initdata;
+
+static int __init set_kprobe_boot_events(char *str)
+{
+ strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
+ return 0;
+}
+__setup("kprobe_event=", set_kprobe_boot_events);
static int trace_kprobe_create(int argc, const char **argv);
static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
@@ -128,8 +142,8 @@ static bool trace_kprobe_match(const char *system, const char *event,
{
struct trace_kprobe *tk = to_trace_kprobe(ev);
- return strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
- (!system || strcmp(tk->tp.call.class->system, system) == 0);
+ return strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+ (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0);
}
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
@@ -143,6 +157,12 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit;
}
+static nokprobe_inline bool trace_kprobe_is_registered(struct trace_kprobe *tk)
+{
+ return !(list_empty(&tk->rp.kp.list) &&
+ hlist_unhashed(&tk->rp.kp.hlist));
+}
+
/* Return 0 if it fails to find the symbol address */
static nokprobe_inline
unsigned long trace_kprobe_address(struct trace_kprobe *tk)
@@ -183,6 +203,16 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
+static void free_trace_kprobe(struct trace_kprobe *tk)
+{
+ if (tk) {
+ trace_probe_cleanup(&tk->tp);
+ kfree(tk->symbol);
+ free_percpu(tk->nhit);
+ kfree(tk);
+ }
+}
+
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
@@ -220,49 +250,20 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
tk->rp.kp.pre_handler = kprobe_dispatcher;
tk->rp.maxactive = maxactive;
+ INIT_HLIST_NODE(&tk->rp.kp.hlist);
+ INIT_LIST_HEAD(&tk->rp.kp.list);
- if (!event || !group) {
- ret = -EINVAL;
- goto error;
- }
-
- tk->tp.call.class = &tk->tp.class;
- tk->tp.call.name = kstrdup(event, GFP_KERNEL);
- if (!tk->tp.call.name)
- goto error;
-
- tk->tp.class.system = kstrdup(group, GFP_KERNEL);
- if (!tk->tp.class.system)
+ ret = trace_probe_init(&tk->tp, event, group);
+ if (ret < 0)
goto error;
dyn_event_init(&tk->devent, &trace_kprobe_ops);
- INIT_LIST_HEAD(&tk->tp.files);
return tk;
error:
- kfree(tk->tp.call.name);
- kfree(tk->symbol);
- free_percpu(tk->nhit);
- kfree(tk);
+ free_trace_kprobe(tk);
return ERR_PTR(ret);
}
-static void free_trace_kprobe(struct trace_kprobe *tk)
-{
- int i;
-
- if (!tk)
- return;
-
- for (i = 0; i < tk->tp.nr_args; i++)
- traceprobe_free_probe_arg(&tk->tp.args[i]);
-
- kfree(tk->tp.call.class->system);
- kfree(tk->tp.call.name);
- kfree(tk->symbol);
- free_percpu(tk->nhit);
- kfree(tk);
-}
-
static struct trace_kprobe *find_trace_kprobe(const char *event,
const char *group)
{
@@ -270,8 +271,8 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
struct trace_kprobe *tk;
for_each_trace_kprobe(tk, pos)
- if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
- strcmp(tk->tp.call.class->system, group) == 0)
+ if (strcmp(t