summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-30 09:49:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-30 09:49:56 -0700
commit343a9f35409b68b6de66ecd0db90a277aee90ec2 (patch)
tree8f62088c78f3eb9ee1d0dab9f4cee0cf5f3878c6 /kernel
parentf4267b3604a84ff72c013a0e3e467289908603a6 (diff)
parenta2acce536921bd793bae13fa344fcea157638e72 (diff)
Merge tag 'trace-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt: "The biggest change here is the updates to kprobes Back in January I posted patches to create function based events. These were the events that you suggested I make to allow developers to easily create events in code where no trace event exists. After posting those changes for review, it was suggested that we implement this instead with kprobes. The problem with kprobes is that the interface is too complex and needs to be simplified. Masami Hiramatsu posted patches in March and I've been playing with them a bit. There's been a bit of clean up in the kprobe code that was inspired by the function based event patches, and a couple of enhancements to the kprobe event interface. - If the arch supports it (we added support for x86), you can place a kprobe event at the start of a function and use $arg1, $arg2, etc to reference the arguments of a function. (Before you needed to know what register or where on the stack the argument was). - The second is a way to see array of events. For example, if you reference a mac address, you can add: echo 'p:mac ip_rcv perm_addr=+574($arg2):x8[6]' > kprobe_events And this will produce: mac: (ip_rcv+0x0/0x140) perm_addr={0x52,0x54,0x0,0xc0,0x76,0xec} Other changes include - Exporting trace_dump_stack to modules - Have the stack tracer trace the entire stack (stop trying to remove tracing itself, as we keep removing too much). - Added support for SDT in uprobes" [ SDT - "Statically Defined Tracing" are userspace markers for tracing. Let's not use random TLA's in explanations unless they are fairly well-established as generic (at least for kernel people) - Linus ] * tag 'trace-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (24 commits) tracing: Have stack tracer trace full stack tracing: Export trace_dump_stack to modules tracing: probeevent: Fix uninitialized used of offset in parse args tracing/kprobes: Allow kprobe-events to record module symbol tracing/kprobes: Check the probe on unloaded module correctly tracing/uprobes: Fix to return -EFAULT if copy_from_user failed tracing: probeevent: Add $argN for accessing function args x86: ptrace: Add function argument access API tracing: probeevent: Add array type support tracing: probeevent: Add symbol type tracing: probeevent: Unify fetch_insn processing common part tracing: probeevent: Append traceprobe_ for exported function tracing: probeevent: Return consumed bytes of dynamic area tracing: probeevent: Unify fetch type tables tracing: probeevent: Introduce new argument fetching code tracing: probeevent: Remove NOKPROBE_SYMBOL from print functions tracing: probeevent: Cleanup argument field definition tracing: probeevent: Cleanup print argument functions trace_uprobe: support reference counter in fd-based uprobe perf probe: Support SDT markers having reference counter (semaphore) ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c49
-rw-r--r--kernel/events/uprobes.c278
-rw-r--r--kernel/trace/trace.c12
-rw-r--r--kernel/trace/trace_event_perf.c7
-rw-r--r--kernel/trace/trace_kprobe.c412
-rw-r--r--kernel/trace/trace_probe.c672
-rw-r--r--kernel/trace/trace_probe.h289
-rw-r--r--kernel/trace/trace_probe_tmpl.h216
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/trace/trace_uprobe.c255
10 files changed, 1261 insertions, 931 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5a97f34bc14c..8c490130c4fb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8376,30 +8376,39 @@ static struct pmu perf_tracepoint = {
*
* PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
* if not set, create kprobe/uprobe
+ *
+ * The following values specify a reference counter (or semaphore in the
+ * terminology of tools like dtrace, systemtap, etc.) Userspace Statically
+ * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset.
+ *
+ * PERF_UPROBE_REF_CTR_OFFSET_BITS # of bits in config as th offset
+ * PERF_UPROBE_REF_CTR_OFFSET_SHIFT # of bits to shift left
*/
enum perf_probe_config {
PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */
+ PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
+ PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
};
PMU_FORMAT_ATTR(retprobe, "config:0");
+#endif
-static struct attribute *probe_attrs[] = {
+#ifdef CONFIG_KPROBE_EVENTS
+static struct attribute *kprobe_attrs[] = {
&format_attr_retprobe.attr,
NULL,
};
-static struct attribute_group probe_format_group = {
+static struct attribute_group kprobe_format_group = {
.name = "format",
- .attrs = probe_attrs,
+ .attrs = kprobe_attrs,
};
-static const struct attribute_group *probe_attr_groups[] = {
- &probe_format_group,
+static const struct attribute_group *kprobe_attr_groups[] = {
+ &kprobe_format_group,
NULL,
};
-#endif
-#ifdef CONFIG_KPROBE_EVENTS
static int perf_kprobe_event_init(struct perf_event *event);
static struct pmu perf_kprobe = {
.task_ctx_nr = perf_sw_context,
@@ -8409,7 +8418,7 @@ static struct pmu perf_kprobe = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
- .attr_groups = probe_attr_groups,
+ .attr_groups = kprobe_attr_groups,
};
static int perf_kprobe_event_init(struct perf_event *event)
@@ -8441,6 +8450,24 @@ static int perf_kprobe_event_init(struct perf_event *event)
#endif /* CONFIG_KPROBE_EVENTS */
#ifdef CONFIG_UPROBE_EVENTS
+PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
+
+static struct attribute *uprobe_attrs[] = {
+ &format_attr_retprobe.attr,
+ &format_attr_ref_ctr_offset.attr,
+ NULL,
+};
+
+static struct attribute_group uprobe_format_group = {
+ .name = "format",
+ .attrs = uprobe_attrs,
+};
+
+static const struct attribute_group *uprobe_attr_groups[] = {
+ &uprobe_format_group,
+ NULL,
+};
+
static int perf_uprobe_event_init(struct perf_event *event);
static struct pmu perf_uprobe = {
.task_ctx_nr = perf_sw_context,
@@ -8450,12 +8477,13 @@ static struct pmu perf_uprobe = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
- .attr_groups = probe_attr_groups,
+ .attr_groups = uprobe_attr_groups,
};
static int perf_uprobe_event_init(struct perf_event *event)
{
int err;
+ unsigned long ref_ctr_offset;
bool is_retprobe;
if (event->attr.type != perf_uprobe.type)
@@ -8471,7 +8499,8 @@ static int perf_uprobe_event_init(struct perf_event *event)
return -EOPNOTSUPP;
is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
- err = perf_uprobe_init(event, is_retprobe);
+ ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
+ err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
if (err)
return err;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2bf792d22087..96d4bee83489 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -73,6 +73,7 @@ struct uprobe {
struct uprobe_consumer *consumers;
struct inode *inode; /* Also hold a ref to inode */
loff_t offset;
+ loff_t ref_ctr_offset;
unsigned long flags;
/*
@@ -88,6 +89,15 @@ struct uprobe {
struct arch_uprobe arch;
};
+struct delayed_uprobe {
+ struct list_head list;
+ struct uprobe *uprobe;
+ struct mm_struct *mm;
+};
+
+static DEFINE_MUTEX(delayed_uprobe_lock);
+static LIST_HEAD(delayed_uprobe_list);
+
/*
* Execute out of line area: anonymous executable mapping installed
* by the probed task to execute the copy of the original instruction
@@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
return 1;
}
+static struct delayed_uprobe *
+delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct delayed_uprobe *du;
+
+ list_for_each_entry(du, &delayed_uprobe_list, list)
+ if (du->uprobe == uprobe && du->mm == mm)
+ return du;
+ return NULL;
+}
+
+static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct delayed_uprobe *du;
+
+ if (delayed_uprobe_check(uprobe, mm))
+ return 0;
+
+ du = kzalloc(sizeof(*du), GFP_KERNEL);
+ if (!du)
+ return -ENOMEM;
+
+ du->uprobe = uprobe;
+ du->mm = mm;
+ list_add(&du->list, &delayed_uprobe_list);
+ return 0;
+}
+
+static void delayed_uprobe_delete(struct delayed_uprobe *du)
+{
+ if (WARN_ON(!du))
+ return;
+ list_del(&du->list);
+ kfree(du);
+}
+
+static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct list_head *pos, *q;
+ struct delayed_uprobe *du;
+
+ if (!uprobe && !mm)
+ return;
+
+ list_for_each_safe(pos, q, &delayed_uprobe_list) {
+ du = list_entry(pos, struct delayed_uprobe, list);
+
+ if (uprobe && du->uprobe != uprobe)
+ continue;
+ if (mm && du->mm != mm)
+ continue;
+
+ delayed_uprobe_delete(du);
+ }
+}
+
+static bool valid_ref_ctr_vma(struct uprobe *uprobe,
+ struct vm_area_struct *vma)
+{
+ unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
+
+ return uprobe->ref_ctr_offset &&
+ vma->vm_file &&
+ file_inode(vma->vm_file) == uprobe->inode &&
+ (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+ vma->vm_start <= vaddr &&
+ vma->vm_end > vaddr;
+}
+
+static struct vm_area_struct *
+find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct vm_area_struct *tmp;
+
+ for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
+ if (valid_ref_ctr_vma(uprobe, tmp))
+ return tmp;
+
+ return NULL;
+}
+
+static int
+__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
+{
+ void *kaddr;
+ struct page *page;
+ struct vm_area_struct *vma;
+ int ret;
+ short *ptr;
+
+ if (!vaddr || !d)
+ return -EINVAL;
+
+ ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+ FOLL_WRITE, &page, &vma, NULL);
+ if (unlikely(ret <= 0)) {
+ /*
+ * We are asking for 1 page. If get_user_pages_remote() fails,
+ * it may return 0, in that case we have to return error.
+ */
+ return ret == 0 ? -EBUSY : ret;
+ }
+
+ kaddr = kmap_atomic(page);
+ ptr = kaddr + (vaddr & ~PAGE_MASK);
+
+ if (unlikely(*ptr + d < 0)) {
+ pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
+ "curr val: %d, delta: %d\n", vaddr, *ptr, d);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *ptr += d;
+ ret = 0;
+out:
+ kunmap_atomic(kaddr);
+ put_page(page);
+ return ret;
+}
+
+static void update_ref_ctr_warn(struct uprobe *uprobe,
+ struct mm_struct *mm, short d)
+{
+ pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
+ "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
+ d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
+ (unsigned long long) uprobe->offset,
+ (unsigned long long) uprobe->ref_ctr_offset, mm);
+}
+
+static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
+ short d)
+{
+ struct vm_area_struct *rc_vma;
+ unsigned long rc_vaddr;
+ int ret = 0;
+
+ rc_vma = find_ref_ctr_vma(uprobe, mm);
+
+ if (rc_vma) {
+ rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
+ ret = __update_ref_ctr(mm, rc_vaddr, d);
+ if (ret)
+ update_ref_ctr_warn(uprobe, mm, d);
+
+ if (d > 0)
+ return ret;
+ }
+
+ mutex_lock(&delayed_uprobe_lock);
+ if (d > 0)
+ ret = delayed_uprobe_add(uprobe, mm);
+ else
+ delayed_uprobe_remove(uprobe, mm);
+ mutex_unlock(&delayed_uprobe_lock);
+
+ return ret;
+}
+
/*
* NOTE:
* Expect the breakpoint instruction to be the smallest size instruction for
@@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr, uprobe_opcode_t opcode)
{
+ struct uprobe *uprobe;
struct page *old_page, *new_page;
struct vm_area_struct *vma;
- int ret;
+ int ret, is_register, ref_ctr_updated = 0;
+
+ is_register = is_swbp_insn(&opcode);
+ uprobe = container_of(auprobe, struct uprobe, arch);
retry:
/* Read the page with vaddr into memory */
@@ -317,6 +491,15 @@ retry:
if (ret <= 0)
goto put_old;
+ /* We are going to replace instruction, update ref_ctr. */
+ if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
+ ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
+ if (ret)
+ goto put_old;
+
+ ref_ctr_updated = 1;
+ }
+
ret = anon_vma_prepare(vma);
if (ret)
goto put_old;
@@ -337,6 +520,11 @@ put_old:
if (unlikely(ret == -EAGAIN))
goto retry;
+
+ /* Revert back reference counter if instruction update failed. */
+ if (ret && is_register && ref_ctr_updated)
+ update_ref_ctr(uprobe, mm, -1);
+
return ret;
}
@@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe)
static void put_uprobe(struct uprobe *uprobe)
{
- if (atomic_dec_and_test(&uprobe->ref))
+ if (atomic_dec_and_test(&uprobe->ref)) {
+ /*
+ * If application munmap(exec_vma) before uprobe_unregister()
+ * gets called, we don't get a chance to remove uprobe from
+ * delayed_uprobe_list from remove_breakpoint(). Do it here.
+ */
+ delayed_uprobe_remove(uprobe, NULL);
kfree(uprobe);
+ }
}
static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -484,7 +679,18 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
return u;
}
-static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
+static void
+ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
+{
+ pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
+ "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
+ uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
+ (unsigned long long) cur_uprobe->ref_ctr_offset,
+ (unsigned long long) uprobe->ref_ctr_offset);
+}
+
+static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
+ loff_t ref_ctr_offset)
{
struct uprobe *uprobe, *cur_uprobe;
@@ -494,6 +700,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->inode = inode;
uprobe->offset = offset;
+ uprobe->ref_ctr_offset = ref_ctr_offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
@@ -501,6 +708,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
cur_uprobe = insert_uprobe(uprobe);
/* a uprobe exists for this inode:offset combination */
if (cur_uprobe) {
+ if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) {
+ ref_ctr_mismatch_warn(cur_uprobe, uprobe);
+ put_uprobe(cur_uprobe);
+ kfree(uprobe);
+ return ERR_PTR(-EINVAL);
+ }
kfree(uprobe);
uprobe = cur_uprobe;
}
@@ -895,7 +1108,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister);
* else return 0 (success)
*/
static int __uprobe_register(struct inode *inode, loff_t offset,
- struct uprobe_consumer *uc)
+ loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
struct uprobe *uprobe;
int ret;
@@ -912,9 +1125,12 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
return -EINVAL;
retry:
- uprobe = alloc_uprobe(inode, offset);
+ uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
if (!uprobe)
return -ENOMEM;
+ if (IS_ERR(uprobe))
+ return PTR_ERR(uprobe);
+
/*
* We can race with uprobe_unregister()->delete_uprobe().
* Check uprobe_is_active() and retry if it is false.
@@ -938,10 +1154,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
int uprobe_register(struct inode *inode, loff_t offset,
struct uprobe_consumer *uc)
{
- return __uprobe_register(inode, offset, uc);
+ return __uprobe_register(inode, offset, 0, uc);
}
EXPORT_SYMBOL_GPL(uprobe_register);
+int uprobe_register_refctr(struct inode *inode, loff_t offset,
+ loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+{
+ return __uprobe_register(inode, offset, ref_ctr_offset, uc);
+}
+EXPORT_SYMBOL_GPL(uprobe_register_refctr);
+
/*
* uprobe_apply - unregister an already registered probe.
* @inode: the file in which the probe has to be removed.
@@ -1060,6 +1283,35 @@ static void build_probe_list(struct inode *inode,
spin_unlock(&uprobes_treelock);
}
+/* @vma contains reference counter, not the probed instruction. */
+static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
+{
+ struct list_head *pos, *q;
+ struct delayed_uprobe *du;
+ unsigned long vaddr;
+ int ret = 0, err = 0;
+
+ mutex_lock(&delayed_uprobe_lock);
+ list_for_each_safe(pos, q, &delayed_uprobe_list) {
+ du = list_entry(pos, struct delayed_uprobe, list);
+
+ if (du->mm != vma->vm_mm ||
+ !valid_ref_ctr_vma(du->uprobe, vma))
+ continue;
+
+ vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
+ ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
+ if (ret) {
+ update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
+ if (!err)
+ err = ret;
+ }
+ delayed_uprobe_delete(du);
+ }
+ mutex_unlock(&delayed_uprobe_lock);
+ return err;
+}
+
/*
* Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
*
@@ -1072,7 +1324,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
struct uprobe *uprobe, *u;
struct inode *inode;
- if (no_uprobe_events() || !valid_vma(vma, true))
+ if (no_uprobe_events())
+ return 0;
+
+ if (vma->vm_file &&
+ (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+ test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
+ delayed_ref_ctr_inc(vma);
+
+ if (!valid_vma(vma, true))
return 0;
inode = file_inode(vma->vm_file);
@@ -1246,6 +1506,10 @@ void uprobe_clear_state(struct mm_struct *mm)
{
struct xol_area *area = mm->uprobes_state.xol_area;
+ mutex_lock(&delayed_uprobe_lock);
+ delayed_uprobe_remove(NULL, mm);
+ mutex_unlock(&delayed_uprobe_lock);
+
if (!area)
return;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bf6f1d70484d..ff1c4b20cd0a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2727,6 +2727,7 @@ void trace_dump_stack(int skip)
__ftrace_trace_stack(global_trace.trace_buffer.buffer,
flags, skip, preempt_count(), NULL);
}
+EXPORT_SYMBOL_GPL(trace_dump_stack);
static DEFINE_PER_CPU(int, user_stack_count);
@@ -4621,13 +4622,18 @@ static const char readme_msg[] =
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
#ifdef CONFIG_UPROBE_EVENTS
- "\t place: <path>:<offset>\n"
+ " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>\n"
+#else
"\t $stack<index>, $stack, $retval, $comm\n"
- "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
- "\t b<bit-width>@<bit-offset>/<container-size>\n"
+#endif
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>,\n"
+ "\t <type>\\[<array-size>\\]\n"
#endif
" events/\t\t- Directory containing all trace event subsystems:\n"
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 69a3fe926e8c..76217bbef815 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -290,7 +290,8 @@ void perf_kprobe_destroy(struct perf_event *p_event)
#endif /* CONFIG_KPROBE_EVENTS */
#ifdef CONFIG_UPROBE_EVENTS
-int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
+int perf_uprobe_init(struct perf_event *p_event,
+ unsigned long ref_ctr_offset, bool is_retprobe)
{
int ret;
char *path = NULL;
@@ -312,8 +313,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
goto out;
}
- tp_event = create_local_trace_uprobe(
- path, p_event->attr.probe_offset, is_retprobe);
+ tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
+ ref_ctr_offset, is_retprobe);
if (IS_ERR(tp_event)) {
ret = PTR_ERR(tp_event);
goto out;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c30032367aab..fec67188c4d2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -14,6 +14,7 @@
#include "trace_kprobe_selftest.h"
#include "trace_probe.h"
+#include "trace_probe_tmpl.h"
#define KPROBE_EVENT_SYSTEM "kprobes"
#define KRETPROBE_MAXACTIVE_MAX 4096
@@ -61,9 +62,23 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}
-static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
{
- return !!strchr(trace_kprobe_symbol(tk), ':');
+ char *p;
+ bool ret;
+
+ if (!tk->symbol)
+ return false;
+ p = strchr(tk->symbol, ':');
+ if (!p)
+ return true;
+ *p = '\0';
+ mutex_lock(&module_mutex);
+ ret = !!find_module(tk->symbol);
+ mutex_unlock(&module_mutex);
+ *p = ':';
+
+ return ret;
}
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
@@ -120,184 +135,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
-/* Memory fetching by symbol */
-struct symbol_cache {
- char *symbol;
- long offset;
- unsigned long addr;
-};
-
-unsigned long update_symbol_cache(struct symbol_cache *sc)
-{
- sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
-
- if (sc->addr)
- sc->addr += sc->offset;
-
- return sc->addr;
-}
-
-void free_symbol_cache(struct symbol_cache *sc)
-{
- kfree(sc->symbol);
- kfree(sc);
-}
-
-struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
-{
- struct symbol_cache *sc;
-
- if (!sym || strlen(sym) == 0)
- return NULL;
-
- sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
- if (!sc)
- return NULL;
-
- sc->symbol = kstrdup(sym, GFP_KERNEL);
- if (!sc->symbol) {
- kfree(sc);
- return NULL;
- }
- sc->offset = offset;
- update_symbol_cache(sc);
-
- return sc;
-}
-
-/*
- * Kprobes-specific fetch functions
- */
-#define DEFINE_FETCH_stack(type) \
-static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
- void *offset, void *dest) \
-{ \
- *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
- (unsigned int)((unsigned long)offset)); \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
-
-DEFINE_BASIC_FETCH_FUNCS(stack)
-/* No string on the stack entry */
-#define fetch_stack_string NULL
-#define fetch_stack_string_size NULL
-
-#define DEFINE_FETCH_memory(type) \
-static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
- void *addr, void *dest) \
-{ \
- type retval; \
- if (probe_kernel_address(addr, retval)) \
- *(type *)dest = 0; \
- else \
- *(type *)dest = retval; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
-
-DEFINE_BASIC_FETCH_FUNCS(memory)
-/*
- * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
- * length and relative data location.
- */
-static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- int maxlen = get_rloc_len(*(u32 *)dest);
- u8 *dst = get_rloc_data(dest);
- long ret;
-
- if (!maxlen)
- return;
-
- /*
- * Try to get string again, since the string can be changed while
- * probing.
- */
- ret = strncpy_from_unsafe(dst, addr, maxlen);
-
- if (ret < 0) { /* Failed to fetch string */
- dst[0] = '\0';
- *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
- } else {
- *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
- }
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
-
-/* Return the length of string -- including null terminal byte */
-static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- mm_segment_t old_fs;
- int ret, len = 0;
- u8 c;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- pagefault_disable();
-
- do {
- ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
- len++;
- } while (c && ret == 0 && len < MAX_STRING_SIZE);
-
- pagefault_enable();
- set_fs(old_fs);
-
- if (ret < 0) /* Failed to check the length */
- *(u32 *)dest = 0;
- else
- *(u32 *)dest = len;
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
-
-#define DEFINE_FETCH_symbol(type) \
-void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
-{ \
- struct symbol_cache *sc = data; \
- if (sc->addr) \
- fetch_memory_##type(regs, (void *)sc->addr, dest); \
- else \
- *(type *)dest = 0; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
-
-DEFINE_BASIC_FETCH_FUNCS(symbol)
-DEFINE_FETCH_symbol(string)
-DEFINE_FETCH_symbol(string_size)
-
-/* kprobes don't support file_offset fetch methods */
-#define fetch_file_offset_u8 NULL
-#define fetch_file_offset_u16 NULL
-#define fetch_file_offset_u32 NULL
-#define fetch_file_offset_u64 NULL
-#define fetch_file_offset_string NULL
-#define fetch_file_offset_string_size NULL
-
-/* Fetch type information table */
-static const struct fetch_type kprobes_fetch_type_table[] = {
- /* Special types */
- [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
- sizeof(u32), 1, "__data_loc char[]"),
- [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
- string_size, sizeof(u32), 0, "u32"),
- /* Basic types */
- ASSIGN_FETCH_TYPE(u8, u8, 0),
- ASSIGN_FETCH_TYPE(u16, u16, 0),
- ASSIGN_FETCH_TYPE(u32, u32, 0),
- ASSIGN_FETCH_TYPE(u64, u64, 0),
- ASSIGN_FETCH_TYPE(s8, u8, 1),
- ASSIGN_FETCH_TYPE(s16, u16, 1),
- ASSIGN_FETCH_TYPE(s32, u32, 1),
- ASSIGN_FETCH_TYPE(s64, u64, 1),
- ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
-
- ASSIGN_FETCH_TYPE_END
-};
-
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
@@ -540,8 +377,11 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
return -EINVAL;
}
- for (i = 0; i < tk->tp.nr_args; i++)
- traceprobe_update_arg(&tk->tp.args[i]);
+ for (i = 0; i < tk->tp.nr_args; i++) {
+ ret = traceprobe_update_arg(&tk->tp.args[i]);
+ if (ret)
+ return ret;
+ }
/* Set/clear disabled flag according to tp->flag */
if (trace_probe_is_enabled(&tk->tp))
@@ -554,19 +394,13 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
else
ret = register_kprobe(&tk->rp.kp);
- if (ret == 0)
+ if (ret == 0) {
tk->tp.flags |= TP_FLAG_REGISTERED;
- else {
- if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
- pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
- ret = 0;
- } else if (ret == -EILSEQ) {
- pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
- tk->rp.kp.addr);
- ret = -EINVAL;
- }
+ } else if (ret == -EILSEQ) {
+ pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
+ tk->rp.kp.addr);
+ ret = -EINVAL;
}
-
return ret;
}
@@ -629,6 +463,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
/* Register k*probe */
ret = __register_trace_kprobe(tk);
+ if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
+ pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
+ ret = 0;
+ }
+
if (ret < 0)
unregister_kprobe_event(tk);
else
@@ -713,13 +552,15 @@ static int create_trace_kprobe(int argc, char **argv)
long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
+ unsigned int flags = TPARG_FL_KERNEL;
/* argc must be >= 1 */
if (argv[0][0] == 'p')
is_return = false;
- else if (argv[0][0] == 'r')
+ else if (argv[0][0] == 'r') {
is_return = true;
- else if (argv[0][0] == '-')
+ flags |= TPARG_FL_RETURN;
+ } else if (argv[0][0] == '-')
is_delete = true;
else {
pr_info("Probe definition must be started with 'p', 'r' or"
@@ -749,10 +590,13 @@ static int create_trace_kprobe(int argc, char **argv)
}
if (event) {
- if (strchr(event, '/')) {
+ char *slash;
+
+ slash = strchr(event, '/');
+ if (slash) {
group = event;
- event = strchr(group, '/') + 1;
- event[-1] = '\0';
+ event = slash + 1;
+ slash[0] = '\0';
if (strlen(group) == 0) {
pr_info("Group name is not specified\n");
return -EINVAL;
@@ -802,8 +646,9 @@ static int create_trace_kprobe(int argc, char **argv)
pr_info("Failed to parse either an address or a symbol.\n");
return ret;
}
- if (offset && is_return &&
- !kprobe_on_func_entry(NULL, symbol, offset)) {
+ if (kprobe_on_func_entry(NULL, symbol, offset))
+ flags |= TPARG_FL_FENTRY;
+ if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
pr_info("Given offset is not valid for return probe.\n");
return -EINVAL;
}
@@ -873,8 +718,7 @@ static int create_trace_kprobe(int argc, char **argv)
/* Parse fetch argument */
ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
- is_return, true,
- kprobes_fetch_type_table);
+ flags);
if (ret) {
pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
@@ -1028,6 +872,106 @@ static const struct file_operations kprobe_profile_ops = {
.release = seq_release,
};
+/* Kprobe specific fetch functions */
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen(unsigned long addr)
+{
+ mm_segment_t old_fs;
+ int ret, len = 0;
+ u8 c;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+
+ do {
+ ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
+ len++;
+ } while (c && ret == 0 && len < MAX_STRING_SIZE);
+
+ pagefault_enable();
+ set_fs(old_fs);
+
+ return (ret < 0) ? ret : len;
+}
+
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
+ * length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base)
+{
+ int maxlen = get_loc_len(*(u32 *)dest);
+ u8 *dst = get_loc_data(dest, base);
+ long ret;
+
+ if (unlikely(!maxlen))
+ return -ENOMEM;
+ /*
+ * Try to get string again, since the string can be changed while
+ * probing.
+ */
+ ret = strncpy_from_unsafe(dst, (void *)addr, maxlen);
+
+ if (ret >= 0)
+ *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
+ return ret;
+}
+
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+ return probe_kernel_read(dest, src, size);
+}
+
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+ void *base)
+{
+ unsigned long val;
+
+retry:
+ /* 1st stage: get value from context */
+ switch (code->op) {
+ case FETCH_OP_REG:
+ val = regs_get_register(regs, code->param);
+ break;
+ case FETCH_OP_STACK:
+ val = regs_get_kernel_stack_nth(regs, code->param);
+ break;
+ case FETCH_OP_STACKP:
+ val = kernel_stack_pointer(regs);
+ break;
+ case FETCH_OP_RETVAL:
+ val = regs_return_value(regs);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_COMM:
+ val = (unsigned long)current->comm;
+ break;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ case FETCH_OP_ARG:
+ val = regs_get_kernel_argument(regs, code->param);
+ break;
+#endif
+ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
+ code++;
+ goto retry;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ return process_fetch_insn_bottom(code, val, dest, base);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
/* Kprobe handler */
static nokprobe_inline void
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
@@ -1059,7 +1003,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
entry = ring_buffe