From d6b183eda466415bb5defcf9afe4cb64734839e8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 24 Aug 2018 16:20:28 -0400 Subject: tracing/kprobe: Remove unneeded extra strchr() from create_trace_kprobe() By utilizing a temporary variable, we can avoid adding another call to strchr(). Instead, save the first call to a temp variable, and then use that variable as the reference to set the event variable. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c30032367aab..508396edc56a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -749,10 +749,13 @@ static int create_trace_kprobe(int argc, char **argv) } if (event) { - if (strchr(event, '/')) { + char *slash; + + slash = strchr(event, '/'); + if (slash) { group = event; - event = strchr(group, '/') + 1; - event[-1] = '\0'; + event = slash + 1; + slash[0] = '\0'; if (strlen(group) == 0) { pr_info("Group name is not specified\n"); return -EINVAL; -- cgit v1.2.3 From 1cc33161a83d20b5462b1e93f95d3ce6388079ee Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 20 Aug 2018 10:12:47 +0530 Subject: uprobes: Support SDT markers having reference count (semaphore) Userspace Statically Defined Tracepoints[1] are dtrace style markers inside userspace applications. Applications like PostgreSQL, MySQL, Pthread, Perl, Python, Java, Ruby, Node.js, libvirt, QEMU, glib etc have these markers embedded in them. These markers are added by developer at important places in the code. Each marker source expands to a single nop instruction in the compiled code but there may be additional overhead for computing the marker arguments which expands to couple of instructions. In case the overhead is more, execution of it can be omitted by runtime if() condition when no one is tracing on the marker: if (reference_counter > 0) { Execute marker instructions; } Default value of reference counter is 0. Tracer has to increment the reference counter before tracing on a marker and decrement it when done with the tracing. Implement the reference counter logic in core uprobe. User will be able to use it from trace_uprobe as well as from kernel module. New trace_uprobe definition with reference counter will now be: :[(ref_ctr_offset)] where ref_ctr_offset is an optional field. For kernel module, new variant of uprobe_register() has been introduced: uprobe_register_refctr(inode, offset, ref_ctr_offset, consumer) No new variant for uprobe_unregister() because it's assumed to have only one reference counter for one uprobe. [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation Note: 'reference counter' is called as 'semaphore' in original Dtrace (or Systemtap, bcc and even in ELF) documentation and code. But the term 'semaphore' is misleading in this context. This is just a counter used to hold number of tracers tracing on a marker. This is not really used for any synchronization. So we are calling it a 'reference counter' in kernel / perf code. Link: http://lkml.kernel.org/r/20180820044250.11659-2-ravi.bangoria@linux.ibm.com Reviewed-by: Masami Hiramatsu [Only trace_uprobe.c] Reviewed-by: Oleg Nesterov Reviewed-by: Song Liu Tested-by: Song Liu Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- kernel/events/uprobes.c | 259 ++++++++++++++++++++++++++++++++++++++++++-- kernel/trace/trace.c | 2 +- kernel/trace/trace_uprobe.c | 38 ++++++- 3 files changed, 288 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3207a4d26849..934feb39f6be 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -73,6 +73,7 @@ struct uprobe { struct uprobe_consumer *consumers; struct inode *inode; /* Also hold a ref to inode */ loff_t offset; + loff_t ref_ctr_offset; unsigned long flags; /* @@ -88,6 +89,15 @@ struct uprobe { struct arch_uprobe arch; }; +struct delayed_uprobe { + struct list_head list; + struct uprobe *uprobe; + struct mm_struct *mm; +}; + +static DEFINE_MUTEX(delayed_uprobe_lock); +static LIST_HEAD(delayed_uprobe_list); + /* * Execute out of line area: anonymous executable mapping installed * by the probed task to execute the copy of the original instruction @@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t return 1; } +static struct delayed_uprobe * +delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct delayed_uprobe *du; + + list_for_each_entry(du, &delayed_uprobe_list, list) + if (du->uprobe == uprobe && du->mm == mm) + return du; + return NULL; +} + +static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct delayed_uprobe *du; + + if (delayed_uprobe_check(uprobe, mm)) + return 0; + + du = kzalloc(sizeof(*du), GFP_KERNEL); + if (!du) + return -ENOMEM; + + du->uprobe = uprobe; + du->mm = mm; + list_add(&du->list, &delayed_uprobe_list); + return 0; +} + +static void delayed_uprobe_delete(struct delayed_uprobe *du) +{ + if (WARN_ON(!du)) + return; + list_del(&du->list); + kfree(du); +} + +static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct list_head *pos, *q; + struct delayed_uprobe *du; + + if (!uprobe && !mm) + return; + + list_for_each_safe(pos, q, &delayed_uprobe_list) { + du = list_entry(pos, struct delayed_uprobe, list); + + if (uprobe && du->uprobe != uprobe) + continue; + if (mm && du->mm != mm) + continue; + + delayed_uprobe_delete(du); + } +} + +static bool valid_ref_ctr_vma(struct uprobe *uprobe, + struct vm_area_struct *vma) +{ + unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset); + + return uprobe->ref_ctr_offset && + vma->vm_file && + file_inode(vma->vm_file) == uprobe->inode && + (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && + vma->vm_start <= vaddr && + vma->vm_end > vaddr; +} + +static struct vm_area_struct * +find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct vm_area_struct *tmp; + + for (tmp = mm->mmap; tmp; tmp = tmp->vm_next) + if (valid_ref_ctr_vma(uprobe, tmp)) + return tmp; + + return NULL; +} + +static int +__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) +{ + void *kaddr; + struct page *page; + struct vm_area_struct *vma; + int ret; + short *ptr; + + if (!vaddr || !d) + return -EINVAL; + + ret = get_user_pages_remote(NULL, mm, vaddr, 1, + FOLL_WRITE, &page, &vma, NULL); + if (unlikely(ret <= 0)) { + /* + * We are asking for 1 page. If get_user_pages_remote() fails, + * it may return 0, in that case we have to return error. + */ + return ret == 0 ? -EBUSY : ret; + } + + kaddr = kmap_atomic(page); + ptr = kaddr + (vaddr & ~PAGE_MASK); + + if (unlikely(*ptr + d < 0)) { + pr_warn("ref_ctr going negative. vaddr: 0x%lx, " + "curr val: %d, delta: %d\n", vaddr, *ptr, d); + ret = -EINVAL; + goto out; + } + + *ptr += d; + ret = 0; +out: + kunmap_atomic(kaddr); + put_page(page); + return ret; +} + +static void update_ref_ctr_warn(struct uprobe *uprobe, + struct mm_struct *mm, short d) +{ + pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " + "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n", + d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, + (unsigned long long) uprobe->offset, + (unsigned long long) uprobe->ref_ctr_offset, mm); +} + +static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, + short d) +{ + struct vm_area_struct *rc_vma; + unsigned long rc_vaddr; + int ret = 0; + + rc_vma = find_ref_ctr_vma(uprobe, mm); + + if (rc_vma) { + rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset); + ret = __update_ref_ctr(mm, rc_vaddr, d); + if (ret) + update_ref_ctr_warn(uprobe, mm, d); + + if (d > 0) + return ret; + } + + mutex_lock(&delayed_uprobe_lock); + if (d > 0) + ret = delayed_uprobe_add(uprobe, mm); + else + delayed_uprobe_remove(uprobe, mm); + mutex_unlock(&delayed_uprobe_lock); + + return ret; +} + /* * NOTE: * Expect the breakpoint instruction to be the smallest size instruction for @@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { + struct uprobe *uprobe; struct page *old_page, *new_page; struct vm_area_struct *vma; - int ret; + int ret, is_register, ref_ctr_updated = 0; + + is_register = is_swbp_insn(&opcode); + uprobe = container_of(auprobe, struct uprobe, arch); retry: /* Read the page with vaddr into memory */ @@ -317,6 +491,15 @@ retry: if (ret <= 0) goto put_old; + /* We are going to replace instruction, update ref_ctr. */ + if (!ref_ctr_updated && uprobe->ref_ctr_offset) { + ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); + if (ret) + goto put_old; + + ref_ctr_updated = 1; + } + ret = anon_vma_prepare(vma); if (ret) goto put_old; @@ -337,6 +520,11 @@ put_old: if (unlikely(ret == -EAGAIN)) goto retry; + + /* Revert back reference counter if instruction update failed. */ + if (ret && is_register && ref_ctr_updated) + update_ref_ctr(uprobe, mm, -1); + return ret; } @@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe) static void put_uprobe(struct uprobe *uprobe) { - if (atomic_dec_and_test(&uprobe->ref)) + if (atomic_dec_and_test(&uprobe->ref)) { + /* + * If application munmap(exec_vma) before uprobe_unregister() + * gets called, we don't get a chance to remove uprobe from + * delayed_uprobe_list from remove_breakpoint(). Do it here. + */ + delayed_uprobe_remove(uprobe, NULL); kfree(uprobe); + } } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -484,7 +679,8 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) return u; } -static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) +static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, + loff_t ref_ctr_offset) { struct uprobe *uprobe, *cur_uprobe; @@ -494,6 +690,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->inode = inode; uprobe->offset = offset; + uprobe->ref_ctr_offset = ref_ctr_offset; init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); @@ -895,7 +1092,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister); * else return 0 (success) */ static int __uprobe_register(struct inode *inode, loff_t offset, - struct uprobe_consumer *uc) + loff_t ref_ctr_offset, struct uprobe_consumer *uc) { struct uprobe *uprobe; int ret; @@ -912,7 +1109,7 @@ static int __uprobe_register(struct inode *inode, loff_t offset, return -EINVAL; retry: - uprobe = alloc_uprobe(inode, offset); + uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (!uprobe) return -ENOMEM; /* @@ -938,10 +1135,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset, int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { - return __uprobe_register(inode, offset, uc); + return __uprobe_register(inode, offset, 0, uc); } EXPORT_SYMBOL_GPL(uprobe_register); +int uprobe_register_refctr(struct inode *inode, loff_t offset, + loff_t ref_ctr_offset, struct uprobe_consumer *uc) +{ + return __uprobe_register(inode, offset, ref_ctr_offset, uc); +} +EXPORT_SYMBOL_GPL(uprobe_register_refctr); + /* * uprobe_apply - unregister an already registered probe. * @inode: the file in which the probe has to be removed. @@ -1060,6 +1264,35 @@ static void build_probe_list(struct inode *inode, spin_unlock(&uprobes_treelock); } +/* @vma contains reference counter, not the probed instruction. */ +static int delayed_ref_ctr_inc(struct vm_area_struct *vma) +{ + struct list_head *pos, *q; + struct delayed_uprobe *du; + unsigned long vaddr; + int ret = 0, err = 0; + + mutex_lock(&delayed_uprobe_lock); + list_for_each_safe(pos, q, &delayed_uprobe_list) { + du = list_entry(pos, struct delayed_uprobe, list); + + if (du->mm != vma->vm_mm || + !valid_ref_ctr_vma(du->uprobe, vma)) + continue; + + vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset); + ret = __update_ref_ctr(vma->vm_mm, vaddr, 1); + if (ret) { + update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1); + if (!err) + err = ret; + } + delayed_uprobe_delete(du); + } + mutex_unlock(&delayed_uprobe_lock); + return err; +} + /* * Called from mmap_region/vma_adjust with mm->mmap_sem acquired. * @@ -1072,7 +1305,15 @@ int uprobe_mmap(struct vm_area_struct *vma) struct uprobe *uprobe, *u; struct inode *inode; - if (no_uprobe_events() || !valid_vma(vma, true)) + if (no_uprobe_events()) + return 0; + + if (vma->vm_file && + (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && + test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) + delayed_ref_ctr_inc(vma); + + if (!valid_vma(vma, true)) return 0; inode = file_inode(vma->vm_file); @@ -1246,6 +1487,10 @@ void uprobe_clear_state(struct mm_struct *mm) { struct xol_area *area = mm->uprobes_state.xol_area; + mutex_lock(&delayed_uprobe_lock); + delayed_uprobe_remove(NULL, mm); + mutex_unlock(&delayed_uprobe_lock); + if (!area) return; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bf6f1d70484d..147be8523560 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4621,7 +4621,7 @@ static const char readme_msg[] = "place (kretprobe): [:][+]|\n" #endif #ifdef CONFIG_UPROBE_EVENTS - "\t place: :\n" + " place (uprobe): :[(ref_ctr_offset)]\n" #endif "\t args: =fetcharg[:type]\n" "\t fetcharg: %, @
, @[+|-],\n" diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e696667da29a..7b85172beab6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -47,6 +47,7 @@ struct trace_uprobe { struct inode *inode; char *filename; unsigned long offset; + unsigned long ref_ctr_offset; unsigned long nhit; struct trace_probe tp; }; @@ -352,10 +353,10 @@ end: static int create_trace_uprobe(int argc, char **argv) { struct trace_uprobe *tu; - char *arg, *event, *group, *filename; + char *arg, *event, *group, *filename, *rctr, *rctr_end; char buf[MAX_EVENT_NAME_LEN]; struct path path; - unsigned long offset; + unsigned long offset, ref_ctr_offset; bool is_delete, is_return; int i, ret; @@ -364,6 +365,7 @@ static int create_trace_uprobe(int argc, char **argv) is_return = false; event = NULL; group = NULL; + ref_ctr_offset = 0; /* argc must be >= 1 */ if (argv[0][0] == '-') @@ -438,6 +440,26 @@ static int create_trace_uprobe(int argc, char **argv) goto fail_address_parse; } + /* Parse reference counter offset if specified. */ + rctr = strchr(arg, '('); + if (rctr) { + rctr_end = strchr(rctr, ')'); + if (rctr > rctr_end || *(rctr_end + 1) != 0) { + ret = -EINVAL; + pr_info("Invalid reference counter offset.\n"); + goto fail_address_parse; + } + + *rctr++ = '\0'; + *rctr_end = '\0'; + ret = kstrtoul(rctr, 0, &ref_ctr_offset); + if (ret) { + pr_info("Invalid reference counter offset.\n"); + goto fail_address_parse; + } + } + + /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); if (ret) goto fail_address_parse; @@ -472,6 +494,7 @@ static int create_trace_uprobe(int argc, char **argv) goto fail_address_parse; } tu->offset = offset; + tu->ref_ctr_offset = ref_ctr_offset; tu->path = path; tu->filename = kstrdup(filename, GFP_KERNEL); @@ -590,6 +613,9 @@ static int probes_seq_show(struct seq_file *m, void *v) trace_event_name(&tu->tp.call), tu->filename, (int)(sizeof(void *) * 2), tu->offset); + if (tu->ref_ctr_offset) + seq_printf(m, "(0x%lx)", tu->ref_ctr_offset); + for (i = 0; i < tu->tp.nr_args; i++) seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); @@ -905,7 +931,13 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, tu->consumer.filter = filter; tu->inode = d_real_inode(tu->path.dentry); - ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + if (tu->ref_ctr_offset) { + ret = uprobe_register_refctr(tu->inode, tu->offset, + tu->ref_ctr_offset, &tu->consumer); + } else { + ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + } + if (ret) goto err_buffer; -- cgit v1.2.3 From 22bad38286d9a652d7061a02f9743bb2ebb84e59 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 20 Aug 2018 10:12:48 +0530 Subject: uprobes/sdt: Prevent multiple reference counter for same uprobe We assume to have only one reference counter for one uprobe. Don't allow user to register multiple uprobes having same inode+offset but different reference counter. Link: http://lkml.kernel.org/r/20180820044250.11659-3-ravi.bangoria@linux.ibm.com Acked-by: Srikar Dronamraju Reviewed-by: Oleg Nesterov Reviewed-by: Song Liu Tested-by: Song Liu Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- kernel/events/uprobes.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 934feb39f6be..96fb51f3994f 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -679,6 +679,16 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) return u; } +static void +ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe) +{ + pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx " + "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n", + uprobe->inode->i_ino, (unsigned long long) uprobe->offset, + (unsigned long long) cur_uprobe->ref_ctr_offset, + (unsigned long long) uprobe->ref_ctr_offset); +} + static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, loff_t ref_ctr_offset) { @@ -698,6 +708,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, cur_uprobe = insert_uprobe(uprobe); /* a uprobe exists for this inode:offset combination */ if (cur_uprobe) { + if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) { + ref_ctr_mismatch_warn(cur_uprobe, uprobe); + put_uprobe(cur_uprobe); + kfree(uprobe); + return ERR_PTR(-EINVAL); + } kfree(uprobe); uprobe = cur_uprobe; } @@ -1112,6 +1128,9 @@ static int __uprobe_register(struct inode *inode, loff_t offset, uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (!uprobe) return -ENOMEM; + if (IS_ERR(uprobe)) + return PTR_ERR(uprobe); + /* * We can race with uprobe_unregister()->delete_uprobe(). * Check uprobe_is_active() and retry if it is false. -- cgit v1.2.3 From ccea8727dc27d8f46df6557f9260ab32760ef409 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 20 Aug 2018 10:12:49 +0530 Subject: trace_uprobe/sdt: Prevent multiple reference counter for same uprobe We assume to have only one reference counter for one uprobe. Don't allow user to add multiple trace_uprobe entries having same inode+offset but different reference counter. Ex, # echo "p:sdt_tick/loop2 /home/ravi/tick:0x6e4(0x10036)" > uprobe_events # echo "p:sdt_tick/loop2_1 /home/ravi/tick:0x6e4(0xfffff)" >> uprobe_events bash: echo: write error: Invalid argument # dmesg trace_kprobe: Reference counter offset mismatch. There is one exception though: When user is trying to replace the old entry with the new one, we allow this if the new entry does not conflict with any other existing entries. Link: http://lkml.kernel.org/r/20180820044250.11659-4-ravi.bangoria@linux.ibm.com Acked-by: Srikar Dronamraju Reviewed-by: Song Liu Reviewed-by: Oleg Nesterov Tested-by: Song Liu Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7b85172beab6..3a7c73c40007 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -312,6 +312,35 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) return 0; } +/* + * Uprobe with multiple reference counter is not allowed. i.e. + * If inode and offset matches, reference counter offset *must* + * match as well. Though, there is one exception: If user is + * replacing old trace_uprobe with new one(same group/event), + * then we allow same uprobe with new reference counter as far + * as the new one does not conflict with any other existing + * ones. + */ +static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new) +{ + struct trace_uprobe *tmp, *old = NULL; + struct inode *new_inode = d_real_inode(new->path.dentry); + + old = find_probe_event(trace_event_name(&new->tp.call), + new->tp.call.class->system); + + list_for_each_entry(tmp, &uprobe_list, list) { + if ((old ? old != tmp : true) && + new_inode == d_real_inode(tmp->path.dentry) && + new->offset == tmp->offset && + new->ref_ctr_offset != tmp->ref_ctr_offset) { + pr_warn("Reference counter offset mismatch."); + return ERR_PTR(-EINVAL); + } + } + return old; +} + /* Register a trace_uprobe and probe_event */ static int register_trace_uprobe(struct trace_uprobe *tu) { @@ -321,8 +350,12 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&uprobe_lock); /* register as an event */ - old_tu = find_probe_event(trace_event_name(&tu->tp.call), - tu->tp.call.class->system); + old_tu = find_old_trace_uprobe(tu); + if (IS_ERR(old_tu)) { + ret = PTR_ERR(old_tu); + goto end; + } + if (old_tu) { /* delete old event */ ret = unregister_trace_uprobe(old_tu); -- cgit v1.2.3 From a6ca88b241d5e929e6e60b12ad8cd288f0ffa256 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 1 Oct 2018 22:36:36 -0700 Subject: trace_uprobe: support reference counter in fd-based uprobe This patch enables uprobes with reference counter in fd-based uprobe. Highest 32 bits of perf_event_attr.config is used to stored offset of the reference count (semaphore). Format information in /sys/bus/event_source/devices/uprobe/format/ is updated to reflect this new feature. Link: http://lkml.kernel.org/r/20181002053636.1896903-1-songliubraving@fb.com Cc: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Reviewed-and-tested-by: Ravi Bangoria Signed-off-by: Song Liu Signed-off-by: Steven Rostedt (VMware) --- kernel/events/core.c | 49 ++++++++++++++++++++++++++++++++--------- kernel/trace/trace_event_perf.c | 7 +++--- kernel/trace/trace_probe.h | 3 ++- kernel/trace/trace_uprobe.c | 4 +++- 4 files changed, 48 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index c80549bf82c6..65b30773af3e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8368,30 +8368,39 @@ static struct pmu perf_tracepoint = { * * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe * if not set, create kprobe/uprobe + * + * The following values specify a reference counter (or semaphore in the + * terminology of tools like dtrace, systemtap, etc.) Userspace Statically + * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset. + * + * PERF_UPROBE_REF_CTR_OFFSET_BITS # of bits in config as th offset + * PERF_UPROBE_REF_CTR_OFFSET_SHIFT # of bits to shift left */ enum perf_probe_config { PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */ + PERF_UPROBE_REF_CTR_OFFSET_BITS = 32, + PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS, }; PMU_FORMAT_ATTR(retprobe, "config:0"); +#endif -static struct attribute *probe_attrs[] = { +#ifdef CONFIG_KPROBE_EVENTS +static struct attribute *kprobe_attrs[] = { &format_attr_retprobe.attr, NULL, }; -static struct attribute_group probe_format_group = { +static struct attribute_group kprobe_format_group = { .name = "format", - .attrs = probe_attrs, + .attrs = kprobe_attrs, }; -static const struct attribute_group *probe_attr_groups[] = { - &probe_format_group, +static const struct attribute_group *kprobe_attr_groups[] = { + &kprobe_format_group, NULL, }; -#endif -#ifdef CONFIG_KPROBE_EVENTS static int perf_kprobe_event_init(struct perf_event *event); static struct pmu perf_kprobe = { .task_ctx_nr = perf_sw_context, @@ -8401,7 +8410,7 @@ static struct pmu perf_kprobe = { .start = perf_swevent_start, .stop = perf_swevent_stop, .read = perf_swevent_read, - .attr_groups = probe_attr_groups, + .attr_groups = kprobe_attr_groups, }; static int perf_kprobe_event_init(struct perf_event *event) @@ -8433,6 +8442,24 @@ static int perf_kprobe_event_init(struct perf_event *event) #endif /* CONFIG_KPROBE_EVENTS */ #ifdef CONFIG_UPROBE_EVENTS +PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63"); + +static struct attribute *uprobe_attrs[] = { + &format_attr_retprobe.attr, + &format_attr_ref_ctr_offset.attr, + NULL, +}; + +static struct attribute_group uprobe_format_group = { + .name = "format", + .attrs = uprobe_attrs, +}; + +static const struct attribute_group *uprobe_attr_groups[] = { + &uprobe_format_group, + NULL, +}; + static int perf_uprobe_event_init(struct perf_event *event); static struct pmu perf_uprobe = { .task_ctx_nr = perf_sw_context, @@ -8442,12 +8469,13 @@ static struct pmu perf_uprobe = { .start = perf_swevent_start, .stop = perf_swevent_stop, .read = perf_swevent_read, - .attr_groups = probe_attr_groups, + .attr_groups = uprobe_attr_groups, }; static int perf_uprobe_event_init(struct perf_event *event) { int err; + unsigned long ref_ctr_offset; bool is_retprobe; if (event->attr.type != perf_uprobe.type) @@ -8463,7 +8491,8 @@ static int perf_uprobe_event_init(struct perf_event *event) return -EOPNOTSUPP; is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE; - err = perf_uprobe_init(event, is_retprobe); + ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT; + err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe); if (err) return err; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 69a3fe926e8c..76217bbef815 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -290,7 +290,8 @@ void perf_kprobe_destroy(struct perf_event *p_event) #endif /* CONFIG_KPROBE_EVENTS */ #ifdef CONFIG_UPROBE_EVENTS -int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe) +int perf_uprobe_init(struct perf_event *p_event, + unsigned long ref_ctr_offset, bool is_retprobe) { int ret; char *path = NULL; @@ -312,8 +313,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe) goto out; } - tp_event = create_local_trace_uprobe( - path, p_event->attr.probe_offset, is_retprobe); + tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset, + ref_ctr_offset, is_retprobe); if (IS_ERR(tp_event)) { ret = PTR_ERR(tp_event); goto out; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 5f52668e165d..03b10f3201a5 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -412,6 +412,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, extern void destroy_local_trace_kprobe(struct trace_event_call *event_call); extern struct trace_event_call * -create_local_trace_uprobe(char *name, unsigned long offs, bool is_return); +create_local_trace_uprobe(char *name, unsigned long offs, + unsigned long ref_ctr_offset, bool is_return); extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); #endif diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3a7c73c40007..d09638706fe0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1405,7 +1405,8 @@ static int unregister_uprobe_event(struct trace_uprobe *tu) #ifdef CONFIG_PERF_EVENTS struct trace_event_call * -create_local_trace_uprobe(char *name, unsigned long offs, bool is_return) +create_local_trace_uprobe(char *name, unsigned long offs, + unsigned long ref_ctr_offset, bool is_return) { struct trace_uprobe *tu; struct path path; @@ -1437,6 +1438,7 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return) tu->offset = offs; tu->path = path; + tu->ref_ctr_offset = ref_ctr_offset; tu->filename = kstrdup(name, GFP_KERNEL); init_trace_event_call(tu, &tu->tp.call); -- cgit v1.2.3 From 56de763052792669d61d79a087611da9a7f04d4e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:16:36 +0900 Subject: tracing: probeevent: Cleanup print argument functions Cleanup the print-argument function to decouple it into print-name and print-value, so that it can support more flexible expression, like array type. Link: http://lkml.kernel.org/r/152465859635.26224.13452846788717102315.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 20 ++++++-------------- kernel/trace/trace_probe.c | 12 +++++------- kernel/trace/trace_probe.h | 19 ++++++++++++++++--- kernel/trace/trace_uprobe.c | 9 ++------- 4 files changed, 29 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 508396edc56a..6326c71181aa 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1136,8 +1136,6 @@ print_kprobe_event(struct trace_iterator *iter, int flags, struct kprobe_trace_entry_head *field; struct trace_seq *s = &iter->seq; struct trace_probe *tp; - u8 *data; - int i; field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); @@ -1149,11 +1147,9 @@ print_kprobe_event(struct trace_iterator *iter, int flags, trace_seq_putc(s, ')'); - data = (u8 *)&field[1]; - for (i = 0; i < tp->nr_args; i++) - if (!tp->args[i].type->print(s, tp->args[i].name, - data + tp->args[i].offset, field)) - goto out; + if (print_probe_args(s, tp->args, tp->nr_args, + (u8 *)&field[1], field) < 0) + goto out; trace_seq_putc(s, '\n'); out: @@ -1167,8 +1163,6 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, struct kretprobe_trace_entry_head *field; struct trace_seq *s = &iter->seq; struct trace_probe *tp; - u8 *data; - int i; field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); @@ -1185,11 +1179,9 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, trace_seq_putc(s, ')'); - data = (u8 *)&field[1]; - for (i = 0; i < tp->nr_args; i++) - if (!tp->args[i].type->print(s, tp->args[i].name, - data + tp->args[i].offset, field)) - goto out; + if (print_probe_args(s, tp->args, tp->nr_args, + (u8 *)&field[1], field) < 0) + goto out; trace_seq_putc(s, '\n'); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e99c3ce7aa65..e2c184eaa7db 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -26,10 +26,9 @@ const char *reserved_field_names[] = { /* Printing in basic type function template */ #define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt) \ -int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name, \ - void *data, void *ent) \ +int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, void *data, void *ent)\ { \ - trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ + trace_seq_printf(s, fmt, *(type *)data); \ return !trace_seq_has_overflowed(s); \ } \ const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \ @@ -49,15 +48,14 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx") /* Print type function for string type */ -int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, - void *data, void *ent) +int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) { int len = *(u32 *)data >> 16; if (!len) - trace_seq_printf(s, " %s=(fault)", name); + trace_seq_puts(s, "(fault)"); else - trace_seq_printf(s, " %s=\"%s\"", name, + trace_seq_printf(s, "\"%s\"", (const char *)get_loc_data(data, ent)); return !trace_seq_has_overflowed(s); } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 03b10f3201a5..8254a061ac35 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -82,7 +82,7 @@ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) /* Data fetch function type */ typedef void (*fetch_func_t)(struct pt_regs *, void *, void *); /* Printing function type */ -typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *); +typedef int (*print_type_func_t)(struct trace_seq *, void *, void *); /* Fetch types */ enum { @@ -124,8 +124,7 @@ typedef u32 string_size; /* Printing in basic type function template */ #define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \ -int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ - void *data, void *ent); \ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, void *data, void *ent);\ extern const char PRINT_TYPE_FMT_NAME(type)[] DECLARE_BASIC_PRINT_TYPE_FUNC(u8); @@ -403,6 +402,20 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, } } +static inline int +print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, + u8 *data, void *field) +{ + int i; + + for (i = 0; i < nr_args; i++) { + trace_seq_printf(s, " %s=", args[i].name); + if (!args[i].type->print(s, data + args[i].offset, field)) + return -ENOMEM; + } + return 0; +} + extern int set_print_fmt(struct trace_probe *tp, bool is_return); #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d09638706fe0..c55753e1079e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -892,7 +892,6 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e struct trace_seq *s = &iter->seq; struct trace_uprobe *tu; u8 *data; - int i; entry = (struct uprobe_trace_entry_head *)iter->ent; tu = container_of(event, struct trace_uprobe, tp.call.event); @@ -909,12 +908,8 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->tp.nr_args; i++) { - struct probe_arg *parg = &tu->tp.args[i]; - - if (!parg->type->print(s, parg->name, data + parg->offset, entry)) - goto out; - } + if (print_probe_args(s, tu->tp.args, tu->tp.nr_args, data, entry) < 0) + goto out; trace_seq_putc(s, '\n'); -- cgit v1.2.3 From eeb07b0615004bce145015b704de85fd3ac6cce0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:17:05 +0900 Subject: tracing: probeevent: Cleanup argument field definition Cleanup event argument definition code in one place for maintenancability. Link: http://lkml.kernel.org/r/152465862529.26224.9068605421476018902.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 32 ++++---------------------------- kernel/trace/trace_probe.c | 21 +++++++++++++++++++++ kernel/trace/trace_probe.h | 2 ++ kernel/trace/trace_uprobe.c | 15 ++------------- 4 files changed, 29 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6326c71181aa..1356927e32d0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1192,49 +1192,25 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, static int kprobe_event_define_fields(struct trace_event_call *event_call) { - int ret, i; + int ret; struct kprobe_trace_entry_head field; struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); - /* Set argument names as fields */ - for (i = 0; i < tk->tp.nr_args; i++) { - struct probe_arg *parg = &tk->tp.args[i]; - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, - sizeof(field) + parg->offset, - parg->type->size, - parg->type->is_signed, - FILTER_OTHER); - if (ret) - return ret; - } - return 0; + return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); } static int kretprobe_event_define_fields(struct trace_event_call *event_call) { - int ret, i; + int ret; struct kretprobe_trace_entry_head field; struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); - /* Set argument names as fields */ - for (i = 0; i < tk->tp.nr_args; i++) { - struct probe_arg *parg = &tk->tp.args[i]; - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, - sizeof(field) + parg->offset, - parg->type->size, - parg->type->is_signed, - FILTER_OTHER); - if (ret) - return ret; - } - return 0; + return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); } #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e2c184eaa7db..21af28ffba3a 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -668,3 +668,24 @@ int set_print_fmt(struct trace_probe *tp, bool is_return) return 0; } + +int traceprobe_define_arg_fields(struct trace_event_call *event_call, + size_t offset, struct trace_probe *tp) +{ + int ret, i; + + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) { + struct probe_arg *parg = &tp->args[i]; + + ret = trace_define_field(event_call, parg->type->fmttype, + parg->name, + offset + parg->offset, + parg->type->size, + parg->type->is_signed, + FILTER_OTHER); + if (ret) + return ret; + } + return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 8254a061ac35..a1df7763b797 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -429,3 +429,5 @@ create_local_trace_uprobe(char *name, unsigned long offs, unsigned long ref_ctr_offset, bool is_return); extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); #endif +extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, + size_t offset, struct trace_probe *tp); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c55753e1079e..28a8f69cec89 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1018,7 +1018,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) static int uprobe_event_define_fields(struct trace_event_call *event_call) { - int ret, i, size; + int ret, size; struct uprobe_trace_entry_head field; struct trace_uprobe *tu = event_call->data; @@ -1030,19 +1030,8 @@ static int uprobe_event_define_fields(struct trace_event_call *event_call) DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); size = SIZEOF_TRACE_ENTRY(false); } - /* Set argument names as fields */ - for (i = 0; i < tu->tp.nr_args; i++) { - struct probe_arg *parg = &tu->tp.args[i]; - - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, size + parg->offset, - parg->type->size, parg->type->is_signed, - FILTER_OTHER); - if (ret) - return ret; - } - return 0; + return traceprobe_define_arg_fields(event_call, size, &tu->tp); } #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.3 From 7bfbc63eda08b8158c040d6882c807f62b0750bb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:17:34 +0900 Subject: tracing: probeevent: Remove NOKPROBE_SYMBOL from print functions Remove unneeded NOKPROBE_SYMBOL from print functions since the print functions are only used when printing out the trace data, and not from kprobe handler. Link: http://lkml.kernel.org/r/152465865422.26224.10111548170594014954.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 21af28ffba3a..5f3b5b3fd2cd 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -31,8 +31,7 @@ int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, void *data, void *ent)\ trace_seq_printf(s, fmt, *(type *)data); \ return !trace_seq_has_overflowed(s); \ } \ -const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \ -NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname)); +const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; DEFINE_BASIC_PRINT_TYPE_FUNC(u8, u8, "%u") DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u") @@ -59,7 +58,6 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) (const char *)get_loc_data(data, ent)); return !trace_seq_has_overflowed(s); } -NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; -- cgit v1.2.3 From 533059281ee594f9fbb9e58042aaec77083ef251 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 25 Apr 2018 21:18:03 +0900 Subject: tracing: probeevent: Introduce new argument fetching code Replace {k,u}probe event argument fetching framework with switch-case based. Currently that is implemented with structures, macros and chain of function-pointers, which is more complicated than necessary and may get a performance penalty by retpoline. This simplify that with an array of "fetch_insn" (opcode and oprands), and make process_fetch_insn() just interprets it. No function pointers are used. Link: http://lkml.kernel.org/r/152465868340.26224.2551120475197839464.stgit@devbox Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 291 +++++++++++++---------------- kernel/trace/trace_probe.c | 401 +++++++++++----------------------------- kernel/trace/trace_probe.h | 230 +++++------------------ kernel/trace/trace_probe_tmpl.h | 120 ++++++++++++ kernel/trace/trace_uprobe.c | 127 ++++++++----- 5 files changed, 491 insertions(+), 678 deletions(-) create mode 100644 kernel/trace/trace_probe_tmpl.h (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1356927e32d0..c024cc40d509 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -14,6 +14,7 @@ #include "trace_kprobe_selftest.h" #include "trace_probe.h" +#include "trace_probe_tmpl.h" #define KPROBE_EVENT_SYSTEM "kprobes" #define KRETPROBE_MAXACTIVE_MAX 4096 @@ -120,160 +121,6 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); -/* Memory fetching by symbol */ -struct symbol_cache { - char *symbol; - long offset; - unsigned long addr; -}; - -unsigned long update_symbol_cache(struct symbol_cache *sc) -{ - sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); - - if (sc->addr) - sc->addr += sc->offset; - - return sc->addr; -} - -void free_symbol_cache(struct symbol_cache *sc) -{ - kfree(sc->symbol); - kfree(sc); -} - -struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) -{ - struct symbol_cache *sc; - - if (!sym || strlen(sym) == 0) - return NULL; - - sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); - if (!sc) - return NULL; - - sc->symbol = kstrdup(sym, GFP_KERNEL); - if (!sc->symbol) { - kfree(sc); - return NULL; - } - sc->offset = offset; - update_symbol_cache(sc); - - return sc; -} - -/* - * Kprobes-specific fetch functions - */ -#define DEFINE_FETCH_stack(type) \ -static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ - void *offset, void *dest) \ -{ \ - *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ - (unsigned int)((unsigned long)offset)); \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type)); - -DEFINE_BASIC_FETCH_FUNCS(stack) -/* No string on the stack entry */ -#define fetch_stack_string NULL -#define fetch_stack_string_size NULL - -#define DEFINE_FETCH_memory(type) \ -static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ - void *addr, void *dest) \ -{ \ - type retval; \ - if (probe_kernel_address(addr, retval)) \ - *(type *)dest = 0; \ - else \ - *(type *)dest = retval; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type)); - -DEFINE_BASIC_FETCH_FUNCS(memory) -/* - * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max - * length and relative data location. - */ -static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, - void *addr, void *dest) -{ - int maxlen = get_rloc_len(*(u32 *)dest); - u8 *dst = get_rloc_data(dest); - long ret; - - if (!maxlen) - return; - - /* - * Try to get string again, since the string can be changed while - * probing. - */ - ret = strncpy_from_unsafe(dst, addr, maxlen); - - if (ret < 0) { /* Failed to fetch string */ - dst[0] = '\0'; - *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); - } else { - *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); - } -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string)); - -/* Return the length of string -- including null terminal byte */ -static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, - void *addr, void *dest) -{ - mm_segment_t old_fs; - int ret, len = 0; - u8 c; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - pagefault_disable(); - - do { - ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); - len++; - } while (c && ret == 0 && len < MAX_STRING_SIZE); - - pagefault_enable(); - set_fs(old_fs); - - if (ret < 0) /* Failed to check the length */ - *(u32 *)dest = 0; - else - *(u32 *)dest = len; -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size)); - -#define DEFINE_FETCH_symbol(type) \ -void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\ -{ \ - struct symbol_cache *sc = data; \ - if (sc->addr) \ - fetch_memory_##type(regs, (void *)sc->addr, dest); \ - else \ - *(type *)dest = 0; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type)); - -DEFINE_BASIC_FETCH_FUNCS(symbol) -DEFINE_FETCH_symbol(string) -DEFINE_FETCH_symbol(string_size) - -/* kprobes don't support file_offset fetch methods */ -#define fetch_file_offset_u8 NULL -#define fetch_file_offset_u16 NULL -#define fetch_file_offset_u32 NULL -#define fetch_file_offset_u64 NULL -#define fetch_file_offset_string NULL -#define fetch_file_offset_string_size NULL - /* Fetch type information table */ static const struct fetch_type kprobes_fetch_type_table[] = { /* Special types */ @@ -529,7 +376,7 @@ static bool within_notrace_func(struct trace_kprobe *tk) /* Internal register function - just handle k*probes and flags */ static int __register_trace_kprobe(struct trace_kprobe *tk) { - int i, ret; + int ret; if (trace_probe_is_registered(&tk->tp)) return -EINVAL; @@ -540,9 +387,6 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) return -EINVAL; } - for (i = 0; i < tk->tp.nr_args; i++) - traceprobe_update_arg(&tk->tp.args[i]); - /* Set/clear disabled flag according to tp->flag */ if (trace_probe_is_enabled(&tk->tp)) tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; @@ -876,8 +720,8 @@ static int create_trace_kprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, - is_return, true, - kprobes_fetch_type_table); + is_return, true, + kprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; @@ -1031,6 +875,133 @@ static const struct file_operations kprobe_profile_ops = { .release = seq_release, }; +/* Kprobe specific fetch functions */ + +/* Return the length of string -- including null terminal byte */ +static nokprobe_inline void +fetch_store_strlen(unsigned long addr, void *dest) +{ + mm_segment_t old_fs; + int ret, len = 0; + u8 c; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + pagefault_disable(); + + do { + ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); + len++; + } while (c && ret == 0 && len < MAX_STRING_SIZE); + + pagefault_enable(); + set_fs(old_fs); + + if (ret < 0) /* Failed to check the length */ + *(u32 *)dest = 0; + else + *(u32 *)dest = len; +} + +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max + * length and relative data location. + */ +static nokprobe_inline void +fetch_store_string(unsigned long addr, void *dest) +{ + int maxlen = get_rloc_len(*(u32 *)dest); + u8 *dst = get_rloc_data(dest); + long ret; + + if (!maxlen) + return; + + /* + * Try to get string again, since the string can be changed while + * probing. + */ + ret = strncpy_from_unsafe(dst, (void *)addr, maxlen); + + if (ret < 0) { /* Failed to fetch string */ + dst[0] = '\0'; + *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); + } else { + *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); + } +} + +/* Note that we don't verify it, since the code does not come from user space */ +static int +process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, + bool pre) +{ + unsigned long val; + int ret; + + /* 1st stage: get value from context */ + switch (code->op) { + case FETCH_OP_REG: + val = regs_get_register(regs, code->param); + break; + case FETCH_OP_STACK: + val = regs_get_kernel_stack_nth(regs, code->param); + break; + case FETCH_OP_STACKP: + val = kernel_stack_pointer(regs); + break; + case FETCH_OP_RETVAL: + val = regs_return_value(regs); + break; + case FETCH_OP_IMM: + val = code->immediate; + break; + case FETCH_OP_COMM: + val = (unsigned long)current->comm; + break; + default: + return -EILSEQ; + } + code++; + + /* 2nd stage: dereference memory if needed */ + while (code->op == FETCH_OP_DEREF) { + ret = probe_kernel_read(&val, (void *)val + code->offset, + sizeof(val)); + if (ret) + return ret; + code++; + } + + /* 3rd stage: store value to buffer */ + switch (code->op) { + case FETCH_OP_ST_RAW: + fetch_store_raw(val, code, dest); + break; + case FETCH_OP_ST_MEM: + probe_kernel_read(dest, (void *)val + code->offset, code->size); + break; + case FETCH_OP_ST_STRING: + if (pre) + fetch_store_strlen(val + code->offset, dest); + else + fetch_store_string(val + code->offset, dest); + break; + default: + return -EILSEQ; + } + code++; + + /* 4th stage: modify stored value if needed */ + if (code->op == FETCH_OP_MOD_BF) { + fetch_apply_bitfield(code, dest); + code++; + } + + return code->op == FETCH_OP_END ? 0 : -EILSEQ; +} +NOKPROBE_SYMBOL(process_fetch_insn) + /* Kprobe handler */ static nokprobe_inline void __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 5f3b5b3fd2cd..c59c69cb2f2e 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -61,174 +61,6 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent) const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; -#define CHECK_FETCH_FUNCS(method, fn) \ - (((FETCH_FUNC_NAME(method, u8) == fn) || \ - (FETCH_FUNC_NAME(method, u16) == fn) || \ - (FETCH_FUNC_NAME(method, u32) == fn) || \ - (FETCH_FUNC_NAME(method, u64) == fn) || \ - (FETCH_FUNC_NAME(method, string) == fn) || \ - (FETCH_FUNC_NAME(method, string_size) == fn)) \ - && (fn != NULL)) - -/* Data fetch function templates */ -#define DEFINE_FETCH_reg(type) \ -void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \ -{ \ - *(type *)dest = (type)regs_get_register(regs, \ - (unsigned int)((unsigned long)offset)); \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type)); -DEFINE_BASIC_FETCH_FUNCS(reg) -/* No string on the register */ -#define fetch_reg_string NULL -#define fetch_reg_string_size NULL - -#define DEFINE_FETCH_retval(type) \ -void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ - void *dummy, void *dest) \ -{ \ - *(type *)dest = (type)regs_return_value(regs); \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type)); -DEFINE_BASIC_FETCH_FUNCS(retval) -/* No string on the retval */ -#define fetch_retval_string NULL -#define fetch_retval_string_size NULL - -/* Dereference memory access function */ -struct deref_fetch_param { - struct fetch_param orig; - long offset; - fetch_func_t fetch; - fetch_func_t fetch_size; -}; - -#define DEFINE_FETCH_deref(type) \ -void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ - void *data, void *dest) \ -{ \ - struct deref_fetch_param *dprm = data; \ - unsigned long addr; \ - call_fetch(&dprm->orig, regs, &addr); \ - if (addr) { \ - addr += dprm->offset; \ - dprm->fetch(regs, (void *)addr, dest); \ - } else \ - *(type *)dest = 0; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type)); -DEFINE_BASIC_FETCH_FUNCS(deref) -DEFINE_FETCH_deref(string) - -void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, - void *data, void *dest) -{ - struct deref_fetch_param *dprm = data; - unsigned long addr; - - call_fetch(&dprm->orig, regs, &addr); - if (addr && dprm->fetch_size) { - addr += dprm->offset; - dprm->fetch_size(regs, (void *)addr, dest); - } else - *(string_size *)dest = 0; -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size)); - -static void update_deref_fetch_param(struct deref_fetch_param *data) -{ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - update_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - update_symbol_cache(data->orig.data); -} -NOKPROBE_SYMBOL(update_deref_fetch_param); - -static void free_deref_fetch_param(struct deref_fetch_param *data) -{ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - free_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - free_symbol_cache(data->orig.data); - kfree(data); -} -NOKPROBE_SYMBOL(free_deref_fetch_param); - -/* Bitfield fetch function */ -struct bitfield_fetch_param { - struct fetch_param orig; - unsigned char hi_shift; - unsigned char low_shift; -}; - -#define DEFINE_FETCH_bitfield(type) \ -void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ - void *data, void *dest) \ -{ \ - struct bitfield_fetch_param *bprm = data; \ - type buf = 0; \ - call_fetch(&bprm->orig, regs, &buf); \ - if (buf) { \ - buf <<= bprm->hi_shift; \ - buf >>= bprm->low_shift; \ - } \ - *(type *)dest = buf; \ -} \ -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type)); -DEFINE_BASIC_FETCH_FUNCS(bitfield) -#define fetch_bitfield_string NULL -#define fetch_bitfield_string_size NULL - -static void -update_bitfield_fetch_param(struct bitfield_fetch_param *data) -{ - /* - * Don't check the bitfield itself, because this must be the - * last fetch function. - */ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - update_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - update_symbol_cache(data->orig.data); -} - -static void -free_bitfield_fetch_param(struct bitfield_fetch_param *data) -{ - /* - * Don't check the bitfield itself, because this must be the - * last fetch function. - */ - if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) - free_deref_fetch_param(data->orig.data); - else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) - free_symbol_cache(data->orig.data); - - kfree(data); -} - -void FETCH_FUNC_NAME(comm, string)(struct pt_regs *regs, - void *data, void *dest) -{ - int maxlen = get_rloc_len(*(u32 *)dest); - u8 *dst = get_rloc_data(dest); - long ret; - - if (!maxlen) - return; - - ret = strlcpy(dst, current->comm, maxlen); - *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string)); - -void FETCH_FUNC_NAME(comm, string_size)(struct pt_regs *regs, - void *data, void *dest) -{ - *(u32 *)dest = strlen(current->comm) + 1; -} -NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string_size)); - static const struct fetch_type *find_fetch_type(const char *type, const struct fetch_type *ftbl) { @@ -272,37 +104,6 @@ fail: return NULL; } -/* Special function : only accept unsigned long */ -static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest) -{ - *(unsigned long *)dest = kernel_stack_pointer(regs); -} -NOKPROBE_SYMBOL(fetch_kernel_stack_address); - -static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest) -{ - *(unsigned long *)dest = user_stack_pointer(regs); -} -NOKPROBE_SYMBOL(fetch_user_stack_address); - -static fetch_func_t get_fetch_size_function(const struct fetch_type *type, - fetch_func_t orig_fn, - const struct fetch_type *ftbl) -{ - int i; - - if (type != &ftbl[FETCH_TYPE_STRING]) - return NULL; /* Only string type needs size function */ - - for (i = 0; i < FETCH_MTD_END; i++) - if (type->fetch[i] == orig_fn) - return ftbl[FETCH_TYPE_STRSIZE].fetch[i]; - - WARN_ON(1); /* This should not happen */ - - return NULL; -} - /* Split symbol and offset. */ int traceprobe_split_symbol_offset(char *symbol, long *offset) { @@ -327,7 +128,7 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset) #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return, + struct fetch_insn *code, bool is_return, bool is_kprobe) { int ret = 0; @@ -335,33 +136,24 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, if (strcmp(arg, "retval") == 0) { if (is_return) - f->fn = t->fetch[FETCH_MTD_retval]; + code->op = FETCH_OP_RETVAL; else ret = -EINVAL; } else if (strncmp(arg, "stack", 5) == 0) { if (arg[5] == '\0') { - if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR)) - return -EINVAL; - - if (is_kprobe) - f->fn = fetch_kernel_stack_address; - else - f->fn = fetch_user_stack_address; + code->op = FETCH_OP_STACKP; } else if (isdigit(arg[5])) { ret = kstrtoul(arg + 5, 10, ¶m); if (ret || (is_kprobe && param > PARAM_MAX_STACK)) ret = -EINVAL; else { - f->fn = t->fetch[FETCH_MTD_stack]; - f->data = (void *)param; + code->op = FETCH_OP_STACK; + code->param = (unsigned int)param; } } else ret = -EINVAL; } else if (strcmp(arg, "comm") == 0) { - if (strcmp(t->name, "string") != 0 && - strcmp(t->name, "string_size") != 0) - return -EINVAL; - f->fn = t->fetch[FETCH_MTD_comm]; + code->op = FETCH_OP_COMM; } else ret = -EINVAL; @@ -369,10 +161,13 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, } /* Recursive argument parser */ -static int parse_probe_arg(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return, bool is_kprobe, - const struct fetch_type *ftbl) +static int +parse_probe_arg(char *arg, const struct fetch_type *type, + struct fetch_insn **pcode, struct fetch_insn *end, + bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { + struct fetch_insn *code = *pcode; unsigned long param; long offset; char *tmp; @@ -380,14 +175,15 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe); + ret = parse_probe_vars(arg + 1, type, code, + is_return, is_kprobe); break; case '%': /* named register */ ret = regs_query_register_offset(arg + 1); if (ret >= 0) { - f->fn = t->fetch[FETCH_MTD_reg]; - f->data = (void *)(unsigned long)ret; + code->op = FETCH_OP_REG; + code->param = (unsigned int)ret; ret = 0; } break; @@ -397,9 +193,9 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, ret = kstrtoul(arg + 1, 0, ¶m); if (ret) break; - - f->fn = t->fetch[FETCH_MTD_memory]; - f->data = (void *)param; + /* load address */ + code->op = FETCH_OP_IMM; + code->immediate = param; } else if (arg[1] == '+') { /* kprobes don't support file offsets */ if (is_kprobe) @@ -409,8 +205,8 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, if (ret) break; - f->fn = t->fetch[FETCH_MTD_file_offset]; - f->data = (void *)offset; + code->op = FETCH_OP_FOFFS; + code->immediate = (unsigned long)offset; // imm64? } else { /* uprobes don't support symbols */ if (!is_kprobe) @@ -420,10 +216,19 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, if (ret) break; - f->data = alloc_symbol_cache(arg + 1, offset); - if (f->data) - f->fn = t->fetch[FETCH_MTD_symbol]; + code->op = FETCH_OP_IMM; + code->immediate = + (unsigned long)kallsyms_lookup_name(arg + 1); + if (!code->immediate) + return -ENOENT; + code->immediate += offset; } + /* These are fetching from memory */ + if (++code == end) + return -E2BIG; + *pcode = code; + code->op = FETCH_OP_DEREF; + code->offset = offset; break; case '+': /* deref memory */ @@ -431,11 +236,10 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, case '-': tmp = strchr(arg, '('); if (!tmp) - break; + return -EINVAL; *tmp = '\0'; ret = kstrtol(arg, 0, &offset); - if (ret) break; @@ -443,36 +247,29 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, tmp = strrchr(arg, ')'); if (tmp) { - struct deref_fetch_param *dprm; - const struct fetch_type *t2; + const struct fetch_type *t2; t2 = find_fetch_type(NULL, ftbl); *tmp = '\0'; - dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL); - - if (!dprm) - return -ENOMEM; - - dprm->offset = offset; - dprm->fetch = t->fetch[FETCH_MTD_memory]; - dprm->fetch_size = get_fetch_size_function(t, - dprm->fetch, ftbl); - ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, - is_kprobe, ftbl); + ret = parse_probe_arg(arg, t2, &code, end, is_return, + is_kprobe, ftbl); if (ret) - kfree(dprm); - else { - f->fn = t->fetch[FETCH_MTD_deref]; - f->data = (void *)dprm; - } + break; + if (code->op == FETCH_OP_COMM) + return -EINVAL; + if (++code == end) + return -E2BIG; + *pcode = code; + + code->op = FETCH_OP_DEREF; + code->offset = offset; } break; } - if (!ret && !f->fn) { /* Parsed, but do not find fetch method */ - pr_info("%s type has no corresponding fetch method.\n", t->name); + if (!ret && code->op == FETCH_OP_NOP) { + /* Parsed, but do not find fetch method */ ret = -EINVAL; } - return ret; } @@ -481,22 +278,15 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, /* Bitfield type needs to be parsed into a fetch function */ static int __parse_bitfield_probe_arg(const char *bf, const struct fetch_type *t, - struct fetch_param *f) + struct fetch_insn **pcode) { - struct bitfield_fetch_param *bprm; + struct fetch_insn *code = *pcode; unsigned long bw, bo; char *tail; if (*bf != 'b') return 0; - bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); - if (!bprm) - return -ENOMEM; - - bprm->orig