From 923c42c19944da214d697e312a040384a0e33e78 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 22 Jul 2009 20:36:03 +0200 Subject: perf_counter tools: Fix/resurrect perf top annotation in a simple interactive form perf top used to have annotation support, but it has bitrotted and removed. This patch restores that: it allows the user to select any symbol in kernel space for source level annotation on the fly, switch between event counters and alter display variables. When symbol details are being displayed, stopping annotation reverts to normal. known keys: [d] select display delay. [e] select display entries (lines). [E] select annotation event counter. [f] select normal display count filter. [F] select annotation display count filter (percentage). [qQ] quit. [s] select annotation symbol and start annotation. [S] stop annotation, revert to normal display. [z] toggle event count zeroing. Sample: ------------------------------------------------------------------------------ PerfTop: 16719 irqs/sec kernel:78.7% [cache-misses/cache-references/instructions/cycles], (all, 4 CPUs) ------------------------------------------------------------------------------ Showing cache-misses for e1000_clean_rx_irq Events Pcnt (>=3%) 0 0.0% /* adjust length to remove Ethernet CRC */ 0 0.0% if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) 0 0.0% length -= 4; 436 5.0% f039: 41 f6 84 24 5c 29 00 testb $0x1,0x295c(%r12) 0 0.0% f089: 8b 4d 84 mov -0x7c(%rbp),%ecx 0 0.0% f08c: 48 83 ef 02 sub $0x2,%rdi 0 0.0% f090: 48 83 ee 02 sub $0x2,%rsi 811 9.3% f094: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi) 0 0.0% 0 0.0% while (rx_desc->status & E1000_RXD_STAT_DD) { 0 0.0% f114: 41 f6 47 0c 01 testb $0x1,0xc(%r15) 7226 82.6% f119: 0f 85 24 fe ff ff jne ef43 Available events: 0 cache-misses 1 cache-references 2 instructions 3 cycles Enter details event counter: 2 ------------------------------------------------------------------------------ PerfTop: 15035 irqs/sec kernel:79.0% [cache-misses/cache-references/instructions/cycles], (all, 4 CPUs) ------------------------------------------------------------------------------ Showing instructions for e1000_clean_rx_irq Events Pcnt (>=3%) 0 0.0% int *work_done, int work_to_do) 0 0.0% { 175 0.9% eebf: 55 push %rbp 1898 9.8% eec0: 48 89 e5 mov %rsp,%rbp 0 0.0% 0 0.0% i = rx_ring->next_to_clean; 140 0.7% ef0a: 0f b7 41 1a movzwl 0x1a(%rcx),%eax 670 3.4% ef0e: 89 45 ac mov %eax,-0x54(%rbp) 0 0.0% { 0 0.0% memcpy(skb->data + offset, from, len); 91 0.5% f07b: 49 8b b6 e8 00 00 00 mov 0xe8(%r14),%rsi 1153 5.9% f082: 48 8b b8 e8 00 00 00 mov 0xe8(%rax),%rdi 42 0.2% f089: 8b 4d 84 mov -0x7c(%rbp),%ecx 14 0.1% f08c: 48 83 ef 02 sub $0x2,%rdi 0 0.0% f090: 48 83 ee 02 sub $0x2,%rsi 1618 8.3% f094: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi) 0 0.0% 0 0.0% /* return some buffers to hardware, one at a time is too slow */ 0 0.0% if (cleaned_count >= E1000_RX_BUFFER_WRITE) { 867 4.5% f0e7: 83 7d b0 0f cmpl $0xf,-0x50(%rbp) 0 0.0% 0 0.0% while (rx_desc->status & E1000_RXD_STAT_DD) { 37 0.2% f114: 41 f6 47 0c 01 testb $0x1,0xc(%r15) 4047 20.8% f119: 0f 85 24 fe ff ff jne ef43 Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 496 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 456 insertions(+), 40 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f139f1ab9333..d58701346b1e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -31,6 +31,8 @@ #include #include +#include +#include #include #include @@ -54,7 +56,7 @@ static int system_wide = 0; static int default_interval = 100000; -static u64 count_filter = 5; +static int count_filter = 5; static int print_entries = 15; static int target_pid = -1; @@ -69,14 +71,26 @@ static int freq = 0; static int verbose = 0; static char *vmlinux = NULL; -static char *sym_filter; -static unsigned long filter_start; -static unsigned long filter_end; - static int delay_secs = 2; static int zero; static int dump_symtab; +/* + * Source + */ + +struct source_line { + u64 eip; + unsigned long count[MAX_COUNTERS]; + char *line; + struct source_line *next; +}; + +static char *sym_filter = NULL; +struct sym_entry *sym_filter_entry = NULL; +static int sym_pcnt_filter = 5; +static int sym_counter = 0; + /* * Symbols */ @@ -91,9 +105,237 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; + struct source_line *source; + struct source_line *lines; + struct source_line **lines_tail; + pthread_mutex_t source_lock; }; -struct sym_entry *sym_filter_entry; +/* + * Source functions + */ + +static void parse_source(struct sym_entry *syme) +{ + struct symbol *sym; + struct module *module; + struct section *section = NULL; + FILE *file; + char command[PATH_MAX*2], *path = vmlinux; + u64 start, end, len; + + if (!syme) + return; + + if (syme->lines) { + pthread_mutex_lock(&syme->source_lock); + goto out_assign; + } + + sym = (struct symbol *)(syme + 1); + module = sym->module; + + if (module) + path = module->path; + if (!path) + return; + + start = sym->obj_start; + if (!start) + start = sym->start; + + if (module) { + section = module->sections->find_section(module->sections, ".text"); + if (section) + start -= section->vma; + } + + end = start + sym->end - sym->start + 1; + len = sym->end - sym->start; + + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); + + file = popen(command, "r"); + if (!file) + return; + + pthread_mutex_lock(&syme->source_lock); + syme->lines_tail = &syme->lines; + while (!feof(file)) { + struct source_line *src; + size_t dummy = 0; + char *c; + + src = malloc(sizeof(struct source_line)); + assert(src != NULL); + memset(src, 0, sizeof(struct source_line)); + + if (getline(&src->line, &dummy, file) < 0) + break; + if (!src->line) + break; + + c = strchr(src->line, '\n'); + if (c) + *c = 0; + + src->next = NULL; + *syme->lines_tail = src; + syme->lines_tail = &src->next; + + if (strlen(src->line)>8 && src->line[8] == ':') { + src->eip = strtoull(src->line, NULL, 16); + if (section) + src->eip += section->vma; + } + if (strlen(src->line)>8 && src->line[16] == ':') { + src->eip = strtoull(src->line, NULL, 16); + if (section) + src->eip += section->vma; + } + } + pclose(file); +out_assign: + sym_filter_entry = syme; + pthread_mutex_unlock(&syme->source_lock); +} + +static void __zero_source_counters(struct sym_entry *syme) +{ + int i; + struct source_line *line; + + line = syme->lines; + while (line) { + for (i = 0; i < nr_counters; i++) + line->count[i] = 0; + line = line->next; + } +} + +static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) +{ + struct source_line *line; + + if (syme != sym_filter_entry) + return; + + if (pthread_mutex_trylock(&syme->source_lock)) + return; + + if (!syme->source) + goto out_unlock; + + for (line = syme->lines; line; line = line->next) { + if (line->eip == ip) { + line->count[counter]++; + break; + } + if (line->eip > ip) + break; + } +out_unlock: + pthread_mutex_unlock(&syme->source_lock); +} + +static void lookup_sym_source(struct sym_entry *syme) +{ + struct symbol *symbol = (struct symbol *)(syme + 1); + struct source_line *line; + char pattern[PATH_MAX]; + char *idx; + + sprintf(pattern, "<%s>:", symbol->name); + + if (symbol->module) { + idx = strstr(pattern, "\t"); + if (idx) + *idx = 0; + } + + pthread_mutex_lock(&syme->source_lock); + for (line = syme->lines; line; line = line->next) { + if (strstr(line->line, pattern)) { + syme->source = line; + break; + } + } + pthread_mutex_unlock(&syme->source_lock); +} + +static void show_lines(struct source_line *queue, int count, int total) +{ + int i; + struct source_line *line; + + line = queue; + for (i = 0; i < count; i++) { + float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; + + printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); + line = line->next; + } +} + +#define TRACE_COUNT 3 + +static void show_details(struct sym_entry *syme) +{ + struct symbol *symbol; + struct source_line *line; + struct source_line *line_queue = NULL; + int displayed = 0; + int line_queue_count = 0, total = 0, more = 0; + + if (!syme) + return; + + if (!syme->source) + lookup_sym_source(syme); + + if (!syme->source) + return; + + symbol = (struct symbol *)(syme + 1); + printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); + printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); + + pthread_mutex_lock(&syme->source_lock); + line = syme->source; + while (line) { + total += line->count[sym_counter]; + line = line->next; + } + + line = syme->source; + while (line) { + float pcnt = 0.0; + + if (!line_queue_count) + line_queue = line; + line_queue_count++; + + if (line->count[sym_counter]) + pcnt = 100.0 * line->count[sym_counter] / (float)total; + if (pcnt >= (float)sym_pcnt_filter) { + if (displayed <= print_entries) + show_lines(line_queue, line_queue_count, total); + else more++; + displayed += line_queue_count; + line_queue_count = 0; + line_queue = NULL; + } else if (line_queue_count > TRACE_COUNT) { + line_queue = line_queue->next; + line_queue_count--; + } + + line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; + line = line->next; + } + pthread_mutex_unlock(&syme->source_lock); + if (more) + printf("%d lines not displayed, maybe increase display entries [e]\n", more); +} struct dso *kernel_dso; @@ -228,6 +470,11 @@ static void print_sym_table(void) printf("------------------------------------------------------------------------------\n\n"); + if (sym_filter_entry) { + show_details(sym_filter_entry); + return; + } + if (nr_counters == 1) printf(" samples pcnt"); else @@ -242,7 +489,7 @@ static void print_sym_table(void) struct symbol *sym = (struct symbol *)(syme + 1); double pcnt; - if (++printed > print_entries || syme->snap_count < count_filter) + if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / @@ -261,19 +508,208 @@ static void print_sym_table(void) } } +static void prompt_integer(int *target, const char *msg) +{ + char *buf = malloc(0), *p; + size_t dummy = 0; + int tmp; + + fprintf(stdout, "\n%s: ", msg); + if (getline(&buf, &dummy, stdin) < 0) + return; + + p = strchr(buf, '\n'); + if (p) + *p = 0; + + p = buf; + while(*p) { + if (!isdigit(*p)) + goto out_free; + p++; + } + tmp = strtoul(buf, NULL, 10); + *target = tmp; +out_free: + free(buf); +} + +static void prompt_percent(int *target, const char *msg) +{ + int tmp = 0; + + prompt_integer(&tmp, msg); + if (tmp >= 0 && tmp <= 100) + *target = tmp; +} + +static void prompt_symbol(struct sym_entry **target, const char *msg) +{ + char *buf = malloc(0), *p; + struct sym_entry *syme = *target, *n, *found = NULL; + size_t dummy = 0; + + /* zero counters of active symbol */ + if (syme) { + pthread_mutex_lock(&syme->source_lock); + __zero_source_counters(syme); + *target = NULL; + pthread_mutex_unlock(&syme->source_lock); + } + + fprintf(stdout, "\n%s: ", msg); + if (getline(&buf, &dummy, stdin) < 0) + goto out_free; + + p = strchr(buf, '\n'); + if (p) + *p = 0; + + pthread_mutex_lock(&active_symbols_lock); + syme = list_entry(active_symbols.next, struct sym_entry, node); + pthread_mutex_unlock(&active_symbols_lock); + + list_for_each_entry_safe_from(syme, n, &active_symbols, node) { + struct symbol *sym = (struct symbol *)(syme + 1); + + if (!strcmp(buf, sym->name)) { + found = syme; + break; + } + } + + if (!found) { + fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); + sleep(1); + return; + } else + parse_source(found); + +out_free: + free(buf); +} + +static void print_known_keys(void) +{ + fprintf(stdout, "\nknown keys:\n"); + fprintf(stdout, "\t[d] select display delay.\n"); + fprintf(stdout, "\t[e] select display entries (lines).\n"); + fprintf(stdout, "\t[E] select annotation event counter.\n"); + fprintf(stdout, "\t[f] select normal display count filter.\n"); + fprintf(stdout, "\t[F] select annotation display count filter (percentage).\n"); + fprintf(stdout, "\t[qQ] quit.\n"); + fprintf(stdout, "\t[s] select annotation symbol and start annotation.\n"); + fprintf(stdout, "\t[S] stop annotation, revert to normal display.\n"); + fprintf(stdout, "\t[z] toggle event count zeroing.\n"); +} + +static void handle_keypress(int c) +{ + int once = 0; +repeat: + switch (c) { + case 'd': + prompt_integer(&delay_secs, "Enter display delay"); + break; + case 'e': + prompt_integer(&print_entries, "Enter display entries (lines)"); + break; + case 'E': + if (nr_counters > 1) { + int i; + + fprintf(stderr, "\nAvailable events:"); + for (i = 0; i < nr_counters; i++) + fprintf(stderr, "\n\t%d %s", i, event_name(i)); + + prompt_integer(&sym_counter, "Enter details event counter"); + + if (sym_counter >= nr_counters) { + fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); + sym_counter = 0; + sleep(1); + } + } else sym_counter = 0; + break; + case 'f': + prompt_integer(&count_filter, "Enter display event count filter"); + break; + case 'F': + prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); + break; + case 'q': + case 'Q': + printf("exiting.\n"); + exit(0); + case 's': + prompt_symbol(&sym_filter_entry, "Enter details symbol"); + break; + case 'S': + if (!sym_filter_entry) + break; + else { + struct sym_entry *syme = sym_filter_entry; + + pthread_mutex_lock(&syme->source_lock); + sym_filter_entry = NULL; + __zero_source_counters(syme); + pthread_mutex_unlock(&syme->source_lock); + } + break; + case 'z': + zero = ~zero; + break; + default: { + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; + struct termios tc, save; + + if (!once) { + print_known_keys(); + once++; + } + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; + tcsetattr(0, TCSANOW, &tc); + + poll(&stdin_poll, 1, -1); + c = getc(stdin); + + tcsetattr(0, TCSAFLUSH, &save); + goto repeat; + } + } +} + static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; - int delay_msecs = delay_secs * 1000; - - printf("PerfTop refresh period: %d seconds\n", delay_secs); + struct termios tc, save; + int delay_msecs, c; + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; +repeat: + delay_msecs = delay_secs * 1000; + tcsetattr(0, TCSANOW, &tc); + /* trash return*/ + getc(stdin); do { print_sym_table(); } while (!poll(&stdin_poll, 1, delay_msecs) == 1); - printf("key pressed - exiting.\n"); - exit(0); + c = getc(stdin); + tcsetattr(0, TCSAFLUSH, &save); + + handle_keypress(c); + goto repeat; return NULL; } @@ -293,7 +729,6 @@ static const char *skip_symbols[] = { static int symbol_filter(struct dso *self, struct symbol *sym) { - static int filter_match; struct sym_entry *syme; const char *name = sym->name; int i; @@ -315,6 +750,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym) return 1; syme = dso__sym_priv(self, sym); + pthread_mutex_init(&syme->source_lock, NULL); + if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) + sym_filter_entry = syme; + for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { syme->skip = 1; @@ -322,29 +761,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym) } } - if (filter_match == 1) { - filter_end = sym->start; - filter_match = -1; - if (filter_end - filter_start > 10000) { - fprintf(stderr, - "hm, too large filter symbol <%s> - skipping.\n", - sym_filter); - fprintf(stderr, "symbol filter start: %016lx\n", - filter_start); - fprintf(stderr, " end: %016lx\n", - filter_end); - filter_end = filter_start = 0; - sym_filter = NULL; - sleep(1); - } - } - - if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) { - filter_match = 1; - filter_start = sym->start; - } - - return 0; } @@ -380,8 +796,6 @@ out_delete_dso: return -1; } -#define TRACE_COUNT 3 - /* * Binary search in the histogram table and record the hit: */ @@ -394,6 +808,7 @@ static void record_ip(u64 ip, int counter) if (!syme->skip) { syme->count[counter]++; + record_precise_ip(syme, counter, ip); pthread_mutex_lock(&active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) __list_insert_active_sym(syme); @@ -690,8 +1105,8 @@ static const struct option options[] = { "put the counters into a counter group"), OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), - OPT_STRING('s', "sym-filter", &sym_filter, "pattern", - "only display symbols matchig this pattern"), + OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", + "symbol to annotate - requires -k option"), OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, @@ -734,6 +1149,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) delay_secs = 1; parse_symbols(); + parse_source(sym_filter_entry); /* * Fill in the ones not specifically initialized via -c: -- cgit v1.2.3 From 46ab976443c6c566c8fe6fc72a6733a55ba9fbea Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 24 Jul 2009 10:09:50 +0200 Subject: perf_counter tools: Allow perf top top users to switch between weighted and individual counter display Add [w]eighted hotkey. Pressing [w] toggles between displaying weighted total of all counters, and the counter selected via [E]vent select key. ------------------------------------------------------------------------------ PerfTop: 90395 irqs/sec kernel:16.1% [cache-misses/cache-references/instructions], (all, 4 CPUs) ------------------------------------------------------------------------------ weight samples pcnt RIP kernel function ______ _______ _____ ________________ _______________ 1275408.6 10881 - 5.3% - ffffffff81146f70 : copy_page_c 553683.4 43569 - 21.3% - ffffffff81146f20 : clear_page_c 74075.0 6768 - 3.3% - ffffffff81147190 : copy_user_generic_string 40602.9 7538 - 3.7% - ffffffff81284ba2 : _spin_lock 26882.1 965 - 0.5% - ffffffff8109d280 : file_ra_state_init [w] ------------------------------------------------------------------------------ PerfTop: 91221 irqs/sec kernel:14.5% [10000Hz cache-misses], (all, 4 CPUs) ------------------------------------------------------------------------------ weight samples pcnt RIP kernel function ______ _______ _____ ________________ _______________ 47320.00 - 22.3% - ffffffff81146f20 : clear_page_c 14261.00 - 6.7% - ffffffff810992f5 : __rmqueue 11046.00 - 5.2% - ffffffff81146f70 : copy_page_c 7842.00 - 3.7% - ffffffff81284ba2 : _spin_lock 7234.00 - 3.4% - ffffffff810aa1d6 : unmap_vmas Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d58701346b1e..4eef3465e837 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -90,6 +90,7 @@ static char *sym_filter = NULL; struct sym_entry *sym_filter_entry = NULL; static int sym_pcnt_filter = 5; static int sym_counter = 0; +static int display_weighted = -1; /* * Symbols @@ -354,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym) double weight = sym->snap_count; int counter; + if (!display_weighted) + return weight; + for (counter = 1; counter < nr_counters-1; counter++) weight *= sym->count[counter]; @@ -401,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) static void print_sym_table(void) { int printed = 0, j; - int counter; + int counter, snap = !display_weighted ? sym_counter : 0; float samples_per_sec = samples/delay_secs; float ksamples_per_sec = (samples-userspace_samples)/delay_secs; float sum_ksamples = 0.0; @@ -417,7 +421,7 @@ static void print_sym_table(void) pthread_mutex_unlock(&active_symbols_lock); list_for_each_entry_safe_from(syme, n, &active_symbols, node) { - syme->snap_count = syme->count[0]; + syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { syme->weight = sym_weight(syme); rb_insert_active_sym(&tmp, syme); @@ -437,7 +441,7 @@ static void print_sym_table(void) samples_per_sec, 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); - if (nr_counters == 1) { + if (nr_counters == 1 || !display_weighted) { printf("%Ld", (u64)attrs[0].sample_period); if (freq) printf("Hz "); @@ -445,7 +449,9 @@ static void print_sym_table(void) printf(" "); } - for (counter = 0; counter < nr_counters; counter++) { + if (!display_weighted) + printf("%s", event_name(sym_counter)); + else for (counter = 0; counter < nr_counters; counter++) { if (counter) printf("/"); @@ -495,7 +501,7 @@ static void print_sym_table(void) pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); - if (nr_counters == 1) + if (nr_counters == 1 || !display_weighted) printf("%20.2f - ", syme->weight); else printf("%9.1f %10ld - ", syme->weight, syme->snap_count); @@ -594,13 +600,14 @@ static void print_known_keys(void) fprintf(stdout, "\nknown keys:\n"); fprintf(stdout, "\t[d] select display delay.\n"); fprintf(stdout, "\t[e] select display entries (lines).\n"); - fprintf(stdout, "\t[E] select annotation event counter.\n"); + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); fprintf(stdout, "\t[f] select normal display count filter.\n"); fprintf(stdout, "\t[F] select annotation display count filter (percentage).\n"); fprintf(stdout, "\t[qQ] quit.\n"); fprintf(stdout, "\t[s] select annotation symbol and start annotation.\n"); fprintf(stdout, "\t[S] stop annotation, revert to normal display.\n"); - fprintf(stdout, "\t[z] toggle event count zeroing.\n"); + fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); } static void handle_keypress(int c) @@ -656,6 +663,9 @@ repeat: pthread_mutex_unlock(&syme->source_lock); } break; + case 'w': + display_weighted = ~display_weighted; + break; case 'z': zero = ~zero; break; -- cgit v1.2.3 From 7b4b6658e152ed4568cfff48175d93645df081d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jul 2009 09:29:32 +0200 Subject: perf_counter: Fix software counters for fast moving event sources Reimplement the software counters to deal with fast moving event sources (such as tracepoints). This means being able to generate multiple overflows from a single 'event' as well as support throttling. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 164 +++++++++++++++++++++++++++++--------------------- 1 file changed, 94 insertions(+), 70 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 868102172aa4..615440ab9295 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3344,87 +3344,81 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi, * Generic software counter infrastructure */ -static void perf_swcounter_update(struct perf_counter *counter) +/* + * We directly increment counter->count and keep a second value in + * counter->hw.period_left to count intervals. This period counter + * is kept in the range [-sample_period, 0] so that we can use the + * sign as trigger. + */ + +static u64 perf_swcounter_set_period(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; - u64 prev, now; - s64 delta; + u64 period = hwc->last_period; + u64 nr, offset; + s64 old, val; + + hwc->last_period = hwc->sample_period; again: - prev = atomic64_read(&hwc->prev_count); - now = atomic64_read(&hwc->count); - if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) - goto again; + old = val = atomic64_read(&hwc->period_left); + if (val < 0) + return 0; - delta = now - prev; + nr = div64_u64(period + val, period); + offset = nr * period; + val -= offset; + if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) + goto again; - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); + return nr; } -static void perf_swcounter_set_period(struct perf_counter *counter) +static void perf_swcounter_overflow(struct perf_counter *counter, + int nmi, struct perf_sample_data *data) { struct hw_perf_counter *hwc = &counter->hw; - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; + u64 overflow; - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - } + data->period = counter->hw.last_period; + overflow = perf_swcounter_set_period(counter); - if (unlikely(left <= 0)) { - left += period; - atomic64_add(period, &hwc->period_left); - hwc->last_period = period; - } + if (hwc->interrupts == MAX_INTERRUPTS) + return; - atomic64_set(&hwc->prev_count, -left); - atomic64_set(&hwc->count, -left); + for (; overflow; overflow--) { + if (perf_counter_overflow(counter, nmi, data)) { + /* + * We inhibit the overflow from happening when + * hwc->interrupts == MAX_INTERRUPTS. + */ + break; + } + } } -static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) +static void perf_swcounter_unthrottle(struct perf_counter *counter) { - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct perf_counter *counter; - u64 period; - - counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->pmu->read(counter); - - data.addr = 0; - data.regs = get_irq_regs(); /* - * In case we exclude kernel IPs or are somehow not in interrupt - * context, provide the next best thing, the user IP. + * Nothing to do, we already reset hwc->interrupts. */ - if ((counter->attr.exclude_kernel || !data.regs) && - !counter->attr.exclude_user) - data.regs = task_pt_regs(current); +} - if (data.regs) { - if (perf_counter_overflow(counter, 0, &data)) - ret = HRTIMER_NORESTART; - } +static void perf_swcounter_add(struct perf_counter *counter, u64 nr, + int nmi, struct perf_sample_data *data) +{ + struct hw_perf_counter *hwc = &counter->hw; - period = max_t(u64, 10000, counter->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + atomic64_add(nr, &counter->count); - return ret; -} + if (!hwc->sample_period) + return; -static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct perf_sample_data *data) -{ - data->period = counter->hw.last_period; + if (!data->regs) + return; - perf_swcounter_update(counter); - perf_swcounter_set_period(counter); - if (perf_counter_overflow(counter, nmi, data)) - /* soft-disable the counter */ - ; + if (!atomic64_add_negative(nr, &hwc->period_left)) + perf_swcounter_overflow(counter, nmi, data); } static int perf_swcounter_is_counting(struct perf_counter *counter) @@ -3488,15 +3482,6 @@ static int perf_swcounter_match(struct perf_counter *counter, return 1; } -static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct perf_sample_data *data) -{ - int neg = atomic64_add_negative(nr, &counter->hw.count); - - if (counter->hw.sample_period && !neg && data->regs) - perf_swcounter_overflow(counter, nmi, data); -} - static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, enum perf_type_id type, u32 event, u64 nr, int nmi, @@ -3575,26 +3560,65 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi, static void perf_swcounter_read(struct perf_counter *counter) { - perf_swcounter_update(counter); } static int perf_swcounter_enable(struct perf_counter *counter) { - perf_swcounter_set_period(counter); + struct hw_perf_counter *hwc = &counter->hw; + + if (hwc->sample_period) { + hwc->last_period = hwc->sample_period; + perf_swcounter_set_period(counter); + } return 0; } static void perf_swcounter_disable(struct perf_counter *counter) { - perf_swcounter_update(counter); } static const struct pmu perf_ops_generic = { .enable = perf_swcounter_enable, .disable = perf_swcounter_disable, .read = perf_swcounter_read, + .unthrottle = perf_swcounter_unthrottle, }; +/* + * hrtimer based swcounter callback + */ + +static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) +{ + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct perf_counter *counter; + u64 period; + + counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); + counter->pmu->read(counter); + + data.addr = 0; + data.regs = get_irq_regs(); + /* + * In case we exclude kernel IPs or are somehow not in interrupt + * context, provide the next best thing, the user IP. + */ + if ((counter->attr.exclude_kernel || !data.regs) && + !counter->attr.exclude_user) + data.regs = task_pt_regs(current); + + if (data.regs) { + if (perf_counter_overflow(counter, 0, &data)) + ret = HRTIMER_NORESTART; + } + + period = max_t(u64, 10000, counter->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + + return ret; +} + /* * Software counter: cpu wall time clock */ -- cgit v1.2.3 From 091bd2e993fcb1094a23e36157285b62bc87afdf Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 4 Aug 2009 10:21:23 +0200 Subject: perf top: Improve interactive key handling Pressing any key which is not currently mapped to functionality, based on startup command line options, displays currently mapped keys, and prompts for input. Pressing any unmapped key at the prompt returns the user to display mode with variables unchanged. eg, pressing ? etc displays currently available keys, the value of the variable associated with that key, and prompts. Pressing same again aborts input. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 108 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 35 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4eef3465e837..7de28ce9ca26 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -595,25 +595,84 @@ out_free: free(buf); } -static void print_known_keys(void) +static void print_mapped_keys(void) { - fprintf(stdout, "\nknown keys:\n"); - fprintf(stdout, "\t[d] select display delay.\n"); - fprintf(stdout, "\t[e] select display entries (lines).\n"); - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); - fprintf(stdout, "\t[f] select normal display count filter.\n"); - fprintf(stdout, "\t[F] select annotation display count filter (percentage).\n"); - fprintf(stdout, "\t[qQ] quit.\n"); - fprintf(stdout, "\t[s] select annotation symbol and start annotation.\n"); - fprintf(stdout, "\t[S] stop annotation, revert to normal display.\n"); - fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + char *name = NULL; + + if (sym_filter_entry) { + struct symbol *sym = (struct symbol *)(sym_filter_entry+1); + name = sym->name; + } + + fprintf(stdout, "\nMapped keys:\n"); + fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); + fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); + + if (nr_counters > 1) + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); + + fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); + + if (vmlinux) { + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); + fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); + fprintf(stdout, "\t[S] stop annotation.\n"); + } + + if (nr_counters > 1) + fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); + fprintf(stdout, "\t[qQ] quit.\n"); +} + +static int key_mapped(int c) +{ + switch (c) { + case 'd': + case 'e': + case 'f': + case 'z': + case 'q': + case 'Q': + return 1; + case 'E': + case 'w': + return nr_counters > 1 ? 1 : 0; + case 'F': + case 's': + case 'S': + return vmlinux ? 1 : 0; + } + + return 0; } static void handle_keypress(int c) { - int once = 0; -repeat: + if (!key_mapped(c)) { + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; + struct termios tc, save; + + print_mapped_keys(); + fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); + fflush(stdout); + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; + tcsetattr(0, TCSANOW, &tc); + + poll(&stdin_poll, 1, -1); + c = getc(stdin); + + tcsetattr(0, TCSAFLUSH, &save); + if (!key_mapped(c)) + return; + } + switch (c) { case 'd': prompt_integer(&delay_secs, "Enter display delay"); @@ -669,28 +728,6 @@ repeat: case 'z': zero = ~zero; break; - default: { - struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; - struct termios tc, save; - - if (!once) { - print_known_keys(); - once++; - } - - tcgetattr(0, &save); - tc = save; - tc.c_lflag &= ~(ICANON | ECHO); - tc.c_cc[VMIN] = 0; - tc.c_cc[VTIME] = 0; - tcsetattr(0, TCSANOW, &tc); - - poll(&stdin_poll, 1, -1); - c = getc(stdin); - - tcsetattr(0, TCSAFLUSH, &save); - goto repeat; - } } } @@ -705,6 +742,7 @@ static void *display_thread(void *arg __used) tc.c_lflag &= ~(ICANON | ECHO); tc.c_cc[VMIN] = 0; tc.c_cc[VTIME] = 0; + repeat: delay_msecs = delay_secs * 1000; tcsetattr(0, TCSANOW, &tc); -- cgit v1.2.3 From 836179834833272f89098c6d1e1b89e8e69797c2 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 4 Aug 2009 10:24:41 +0200 Subject: perf top: Update man page perf_counter tools: update perf top manual page to reflect current implementation. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-top.txt | 112 ++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 539d01289725..4a7d558dc309 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -3,36 +3,122 @@ perf-top(1) NAME ---- -perf-top - Run a command and profile it +perf-top - System profiling tool. SYNOPSIS -------- [verse] -'perf top' [-e | --event=EVENT] [-l] [-a] +'perf top' [-e | --event=EVENT] [] DESCRIPTION ----------- -This command runs a command and gathers a performance counter profile -from it. +This command generates and displays a performance counter profile in realtime. OPTIONS ------- -...:: - Any command you can specify in a shell. +-a:: +--all-cpus:: + System-wide collection. (default) + +-c :: +--count=:: + Event period to sample. + +-C :: +--CPU=:: + CPU to profile. + +-d :: +--delay=:: + Number of seconds to delay between refreshes. --e:: ---event=:: +-e :: +--event=:: Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. --a:: - system-wide collection +-E :: +--entries=:: + Display this many functions. + +-f :: +--count-filter=:: + Only display functions with more events than this. + +-F :: +--freq=:: + Profile at this frequency. + +-i:: +--inherit:: + Child tasks inherit counters, only makes sens with -p option. + +-k :: +--vmlinux=:: + Path to vmlinux. Required for annotation functionality. + +-m :: +--mmap-pages=:: + Number of mmapped data pages. + +-p :: +--pid=:: + Profile events on existing pid. + +-r :: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. + +-s :: +--sym-annotate=:: + Annotate this symbol. Requires -k option. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-z:: +--zero:: + Zero history across display updates. + +INTERACTIVE PROMPTING KEYS +-------------------------- + +[d]:: + Display refresh delay. + +[e]:: + Number of entries to display. + +[E]:: + Event to display when multiple counters are active. + +[f]:: + Profile display filter (>= hit count). + +[F]:: + Annotation display filter (>= % of total). + +[s]:: + Annotate symbol. + +[S]:: + Stop annotation, return to full profile display. + +[w]:: + Toggle between weighted sum and individual count[E]r profile. + +[z]:: + Toggle event count zeroing across display updates. + +[qQ]:: + Quit. + +Pressing any unmapped key displays a menu, and prompts for input. --l:: - scale counter values SEE ALSO -------- -- cgit v1.2.3 From 1953287bfe8afcbbd235bd6c42c9df06d52438dc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Aug 2009 07:11:05 +0200 Subject: perf tools: Fix call-chain cumul hit based sub-total (fractal mode) The callchain fractal mode builds each new total hits in a new branch of profiling by using the parent's hits of the current branch plus the hits of the children. This is wrong, the total hits of a branch should be made of the sum of every children hits, we must ignore the parent hits in this scope. This patch also fixes another mistake with the hit counting. Now the rates are correct. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Pekka Enberg Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 ++-- tools/perf/util/callchain.c | 27 ++++++++++++++++----------- tools/perf/util/callchain.h | 7 ++++++- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8cb58d68a006..da402e186561 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -901,7 +901,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, int i; if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = self->cumul_hit; + new_total = self->children_hit; else new_total = total_samples; @@ -930,7 +930,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, new_total, - child->cumul_hit); + cumul_hits(child)); } ret += callchain__fprintf_graph(fp, child, new_total, depth + 1, diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9d3c8141b8c1..98c5627f327b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -26,10 +26,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct callchain_node *rnode; + u64 chain_cumul = cumul_hits(chain); while (*p) { + u64 rnode_cumul; + parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node); + rnode_cumul = cumul_hits(rnode); switch (mode) { case CHAIN_FLAT: @@ -40,7 +44,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, break; case CHAIN_GRAPH_ABS: /* Falldown */ case CHAIN_GRAPH_REL: - if (rnode->cumul_hit < chain->cumul_hit) + if (rnode_cumul < chain_cumul) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -87,7 +91,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, chain_for_each_child(child, node) { __sort_chain_graph_abs(child, min_hit); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_ABS); } @@ -108,11 +112,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, u64 min_hit; node->rb_root = RB_ROOT; - min_hit = node->cumul_hit * min_percent / 100.0; + min_hit = node->children_hit * min_percent / 100.0; chain_for_each_child(child, node) { __sort_chain_graph_rel(child, min_percent); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_REL); } @@ -211,7 +215,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain, new = create_child(parent, false); fill_node(new, chain, start, syms); - new->cumul_hit = new->hit = 1; + new->children_hit = 0; + new->hit = 1; } /* @@ -241,7 +246,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, /* split the hits */ new->hit = parent->hit; - new->cumul_hit = parent->cumul_hit; + new->children_hit = parent->children_hit; + parent->children_hit = cumul_hits(new); new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; @@ -249,6 +255,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, if (idx_total < chain->nr) { parent->hit = 0; add_child(parent, chain, idx_total, syms); + parent->children_hit++; } else { parent->hit = 1; } @@ -269,13 +276,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) - goto cumul; + goto inc_children_hit; } /* nothing in children, add to the current node */ add_child(root, chain, start, syms); -cumul: - root->cumul_hit++; +inc_children_hit: + root->children_hit++; } static int @@ -317,8 +324,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match 100% of the path, increment the hit */ if (i - start == root->val_nr && i == chain->nr) { root->hit++; - root->cumul_hit++; - return 0; } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 7812122bea1d..b2d128e07c88 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -21,7 +21,7 @@ struct callchain_node { struct rb_root rb_root; /* sorted tree of children */ unsigned int val_nr; u64 hit; - u64 cumul_hit; /* hit + hits of children */ + u64 children_hit; }; struct callchain_param; @@ -48,6 +48,11 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } +static inline u64 cumul_hits(struct callchain_node *node) +{ + return node->hit + node->children_hit; +} + int register_callchain_param(struct callchain_param *param); void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -- cgit v1.2.3 From 1c222bce7dd0cb9578b4c5cd3874a74f1db497c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2009 20:57:41 +0200 Subject: perf tools: Fix multi-counter stat bug caused by incorrect reading of perf.data file header Brice Goglin reported that only the first result from a multi-counter perf record --stat run is accurate, the rest looks bogus. A silly mistake made us re-read the first attribute for every recorded attribute. Reported-by: Brice Goglin Signed-off-by: Peter Zijlstra Tested-by: Brice Goglin Cc: paulus@samba.org Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 450384b3bbe5..95a44bcfc2dc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -213,9 +213,10 @@ struct perf_header *perf_header__read(int fd) for (i = 0; i < nr_attrs; i++) { struct perf_header_attr *attr; - off_t tmp = lseek(fd, 0, SEEK_CUR); + off_t tmp; do_read(fd, &f_attr, sizeof(f_attr)); + tmp = lseek(fd, 0, SEEK_CUR); attr = perf_header_attr__new(&f_attr.attr); -- cgit v1.2.3 From 8f18aec535b5ca513dd13b531730177d35175ffa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2009 19:40:28 +0200 Subject: perf report: Fix per task mult-counter stat reporting Brice Goglin reported: > I can easily sort them by thread id, but I don't know how to match > my 4 events with each group of 4 lines. Also report the counter id and the time running/enabled stats (in case the counter got time-shared). Reported-by: Brice Goglin Signed-off-by: Peter Zijlstra Tested-by: Brice Goglin Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 31 +++++++++++++++++++++++++++---- tools/perf/util/parse-events.c | 8 +++++++- tools/perf/util/parse-events.h | 1 + 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index da402e186561..84205462e07b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -112,7 +112,9 @@ struct read_event { struct perf_event_header header; u32 pid,tid; u64 value; - u64 format[3]; + u64 time_enabled; + u64 time_running; + u64 id; }; typedef union event_union { @@ -1690,14 +1692,37 @@ static void trace_event(event_t *event) dprintf(".\n"); } +static struct perf_header *header; + +static struct perf_counter_attr *perf_header__find_attr(u64 id) +{ + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + int j; + + for (j = 0; j < attr->ids; j++) { + if (attr->id[j] == id) + return &attr->attr; + } + } + + return NULL; +} + static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { - dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", + struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); + + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), event->read.pid, event->read.tid, + attr ? __event_name(attr->type, attr->config) + : "FAIL", event->read.value); return 0; @@ -1743,8 +1768,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static struct perf_header *header; - static u64 perf_header__sample_type(void) { u64 sample_type = 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7bdad8df22a6..f77407b5832e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -223,9 +223,15 @@ char *event_name(int counter) { u64 config = attrs[counter].config; int type = attrs[counter].type; + + return __event_name(type, config); +} + +char *__event_name(int type, u64 config) +{ static char buf[32]; - if (attrs[counter].type == PERF_TYPE_RAW) { + if (type == PERF_TYPE_RAW) { sprintf(buf, "raw 0x%llx", config); return buf; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1ea5d09b6eb1..192a962e3a0f 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -10,6 +10,7 @@ extern int nr_counters; extern struct perf_counter_attr attrs[MAX_COUNTERS]; extern char *event_name(int ctr); +extern char *__event_name(int type, u64 config); extern int parse_events(const struct option *opt, const char *str, int unset); -- cgit v1.2.3 From 94cb9e385d5b4d55a5ae389baa10ad2835ea39bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Aug 2009 14:43:17 -0300 Subject: perf report: Add debug help for the finding of symbol bugs - show the symtab origin (DSO, build-id, kernel, etc) Used with perf report --verbose: [acme@doppio linux-2.6-tip]$ perf report -v | head -16 5.17% firefox /usr/lib64/xulrunner-1.9.1/libxul.so 0x00000000005d8eee f [.] imgContainer::DrawFrameTo(gfxIImageFrame*, gfxIImageFrame*, nsRect&) 2.56% firefox /lib64/libpthread-2.10.1.so 0x0000000000008e02 d [.] __pthread_mutex_lock_internal 1.94% firefox /usr/lib64/xulrunner-1.9.1/libxul.so 0x0000000000d0af8f f [.] SearchTable 1.75% firefox [kernel] 0xffffffffff60013b k [.] vread_hpet 1.63% firefox /lib64/libpthread-2.10.1.so 0x000000000000a404 d [.] __pthread_mutex_unlock 1.47% firefox /usr/lib64/xulrunner-1.9.1/libmozjs.so 0x00000000000482ea f [.] js_Interpret 1.42% firefox /usr/lib64/xulrunner-1.9.1/libmozjs.so 0x000000000003eda3 f [.] JS_CallTracer 1.24% firefox [kernel] 0xffffffff8102ca4a k [k] read_hpet 1.16% firefox [kernel] 0xffffffff810f3dd4 k [k] fget_light 1.11% firefox /usr/lib64/xulrunner-1.9.1/libmozjs.so 0x00000000000567ff f [.] js_TraceObject 0.98% firefox /usr/lib64/firefox-3.5.2/firefox 0x000000000000dd23 b [.] arena_ralloc [acme@doppio linux-2.6-tip]$ The new field is just after the symbol address. To help in figuring out symbol resolution bugs. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 3 ++- tools/perf/util/symbol.c | 57 ++++++++++++++++++++++++++++++++++++--------- tools/perf/util/symbol.h | 2 ++ 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 84205462e07b..a5e2f8df411c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -700,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) size_t ret = 0; if (verbose) - ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); + ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, + dso__symtab_origin(self->dso)); ret += repsep_fprintf(fp, "[%c] ", self->level); if (self->sym) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 16ddca202948..f1dcede14307 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -24,6 +24,16 @@ const char *sym_hist_filter; #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ #endif +enum dso_origin { + DSO__ORIG_KERNEL = 0, + DSO__ORIG_JAVA_JIT, + DSO__ORIG_FEDORA, + DSO__ORIG_UBUNTU, + DSO__ORIG_BUILDID, + DSO__ORIG_DSO, + DSO__ORIG_NOT_FOUND, +}; + static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, u64 obj_start, int verbose) @@ -81,6 +91,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size) self->sym_priv_size = sym_priv_size; self->find_symbol = dso__find_symbol; self->slen_calculated = 0; + self->origin = DSO__ORIG_NOT_FOUND; } return self; @@ -710,7 +721,7 @@ static char *dso__read_build_id(struct dso *self, int verbose) ++raw; bid += 2; } - if (verbose) + if (verbose >= 2) printf("%s(%s): %s\n", __func__, self->name, build_id); out_elf_end: elf_end(elf); @@ -720,11 +731,26 @@ out: return build_id; } +char dso__symtab_origin(const struct dso *self) +{ + static const char origin[] = { + [DSO__ORIG_KERNEL] = 'k', + [DSO__ORIG_JAVA_JIT] = 'j', + [DSO__ORIG_FEDORA] = 'f', + [DSO__ORIG_UBUNTU] = 'u', + [DSO__ORIG_BUILDID] = 'b', + [DSO__ORIG_DSO] = 'd', + }; + + if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) + return '!'; + return origin[self->origin]; +} + int dso__load(struct dso *self, symbol_filter_t filter, int verbose) { int size = PATH_MAX; char *name = malloc(size), *build_id = NULL; - int variant = 0; int ret = -1; int fd; @@ -733,19 +759,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) self->adjust_symbols = 0; - if (strncmp(self->name, "/tmp/perf-", 10) == 0) - return dso__load_perf_map(self, filter, verbose); + if (strncmp(self->name, "/tmp/perf-", 10) == 0) { + ret = dso__load_perf_map(self, filter, verbose); + self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : + DSO__ORIG_NOT_FOUND; + return ret; + } + + self->origin = DSO__ORIG_FEDORA - 1; more: do { - switch (variant) { - case 0: /* Fedora */ + self->origin++; + switch (self->origin) { + case DSO__ORIG_FEDORA: snprintf(name, size, "/usr/lib/debug%s.debug", self->name); break; - case 1: /* Ubuntu */ + case DSO__ORIG_UBUNTU: snprintf(name, size, "/usr/lib/debug%s", self->name); break; - case 2: + case DSO__ORIG_BUILDID: build_id = dso__read_build_id(self, verbose); if (build_id != NULL) { snprintf(name, size, @@ -754,16 +787,15 @@ more: free(build_id); break; } - variant++; + self->origin++; /* Fall thru */ - case 3: /* Sane people */ + case DSO__ORIG_DSO: snprintf(name, size, "%s", self->name); break; default: goto out; } - variant++; fd = open(name, O_RDONLY); } while (fd < 0); @@ -899,6 +931,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (err <= 0) err = dso__load_kallsyms(self, filter, verbose); + if (err > 0) + self->origin = DSO__ORIG_KERNEL; + return err; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2f92b21c712d..1e003ec2f4b1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -26,6 +26,7 @@ struct dso { unsigned int sym_priv_size; unsigned char adjust_symbols; unsigned char slen_calculated; + unsigned char origin; char name[0]; }; @@ -49,6 +50,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); int dso__load(struct dso *self, symbol_filter_t filter, int verbose); size_t dso__fprintf(struct dso *self, FILE *fp); +char dso__symtab_origin(const struct dso *self); void symbol__init(void); #endif /* _PERF_SYMBOL_ */ -- cgit v1.2.3 From b26bc5a7f81474937e427b0c855eabee5ad56f89 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 7 Aug 2009 10:18:39 +0200 Subject: perf stat: Fix tool option consistency: rename -S/--scale to -c/--scale We want to use a coherent flag for -S/--stat across all tools, so free up -S in perf stat. Signed-off-by: Brice Goglin Cc: Peter Zijlstra Cc: paulus@samba.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 2 +- tools/perf/builtin-stat.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 0d74346d21ab..484080dd5b6f 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --S:: +-c:: scale counter values EXAMPLES diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f9510eeeb6c7..b4b06c7903e1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -496,7 +496,7 @@ static const struct option options[] = { "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('S', "scale", &scale, + OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), -- cgit v1.2.3 From f36a1a133a947973efb8e6a1fbdcc23e4a011437 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 7 Aug 2009 16:59:45 +1000 Subject: perf_counter/powerpc: Fix oops on cpus without perf_counter hardware support If we have the powerpc perf_counter backend compiled in, but the cpu we are running on is one where we don't support the PMU, we currently oops in hw_perf_group_sched_in if we try to use any counters, because ppmu is NULL in that case, and we unconditionally dereference ppmu. This fixes the problem by adding a check if ppmu is NULL at the beginning of hw_perf_group_sched_in, and also at the beginning of the other functions that get called from the perf_counter core, i.e. hw_perf_disable, hw_perf_enable, and hw_perf_counter_setup. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: benh@kernel.crashing.org Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 809fdf94b95f..70e1f57f7dd8 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -518,6 +518,8 @@ void hw_perf_disable(void) struct cpu_hw_counters *cpuhw; unsigned long flags; + if (!ppmu) + return; local_irq_save(flags); cpuhw = &__get_cpu_var(cpu_hw_counters); @@ -572,6 +574,8 @@ void hw_perf_enable(void) int n_lim; int idx; + if (!ppmu) + return; local_irq_save(flags); cpuhw = &__get_cpu_var(cpu_hw_counters); if (!cpuhw->disabled) { @@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, long i, n, n0; struct perf_counter *sub; + if (!ppmu) + return 0; cpuhw = &__get_cpu_var(cpu_hw_counters); n0 = cpuhw->n_counters; n = collect_events(group_leader, ppmu->n_counter - n0, @@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu) { struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); + if (!ppmu) + return; memset(cpuhw, 0, sizeof(*cpuhw)); cpuhw->mmcr[0] = MMCR0_FC; } -- cgit v1.2.3 From ae07b63f4b6728e1f98aa5c5416cfc1280f59f51 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2009 16:48:54 +0200 Subject: perf list: Fix the output to not include tracepoints without an id Stop perf list from displaying tracepoints without an id file, those are special tracepoints that are not interfaced to perfcounters so listing them is erroneous and passing them as events will produce no output. Signed-off-by: Peter Zijlstra Acked-by: Jason Baron Cc: Steven Rostedt Cc: Chris Mason Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f77407b5832e..4858d83b3b67 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = { (strcmp(sys_dirent.d_name, ".")) && \ (strcmp(sys_dirent.d_name, ".."))) +static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) +{ + char evt_path[MAXPATHLEN]; + int fd; + + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, + sys_dir->d_name, evt_dir->d_name); + fd = open(evt_path, O_RDONLY); + if (fd < 0) + return -EINVAL; + close(fd); + + return 0; +} + #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ sys_dirent.d_name, evt_dirent.d_name) && \ (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ (strcmp(evt_dirent.d_name, ".")) && \ - (strcmp(evt_dirent.d_name, ".."))) + (strcmp(evt_dirent.d_name, "..")) && \ + (!tp_event_has_id(&sys_dirent, &evt_dirent))) #define MAX_EVENT_LENGTH 30 -- cgit v1.2.3 From 7eac7e9e726c1b136bd7e0ad6671ce315f48bb18 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Fri, 7 Aug 2009 14:16:00 +0200 Subject: perf util: Fix do_read() to fail on EOF instead of busy-looping While toying with perf, I've noticed that perf record can easily enter a busy loop when doing something as silly as: $ perf record -A ls Yeah, do_read here really wants to read a known size, not being able to should die(), not busy-loop ;) That was the cause for the bug. Signed-off-by: Pierre Habouzit Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 95a44bcfc2dc..b92a457ca32e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size) if (ret < 0) die("failed to read"); + if (ret == 0) + die("failed to read: missing data"); size -= ret; buf += ret; -- cgit v1.2.3 From 266e0e219888420a1a7cafc82e82891cf7b5a979 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Fri, 7 Aug 2009 14:16:01 +0200 Subject: perf record: Fix the -A UI for empty or non-existent perf.data 1. Ignore the -A argument if there is no perf.data file 2. Treat an empty file like a non existent file. Else, perf will try to read the perf.data header, and fail with an error. Treating an empty file like a non-existent file makes sense, since an interupted (as in SIGKILLed) perf could leave such files around, and you don't want to annoy the user with errors for files with no data in it. Signed-off-by: Pierre Habouzit Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 90c98082af10..0345aad8eba5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -525,10 +525,14 @@ static int __cmd_record(int argc