From cdcd1e6bd8a92f8353fc2f37003c6eae2d1e6903 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Jun 2014 16:00:18 -0300 Subject: perf trace: Fix up fd -> pathname resolution There was a brown paper bag bug in the patch that introduced a reference implementation on using 'perf probe' made wannabe tracepoints that broke fd -> pathname resolution, fix it: [root@zoo ~]# perf probe 'vfs_getname=getname_flags:65 pathname=result->name:string' Added new event: probe:vfs_getname (on getname_flags:65 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 [root@zoo ~] Before: [acme@zoo linux]$ trace touch -e open,fstat /tmp/b 1.159 ( 0.007 ms): open(filename: 0x7fd73f2fe088, flags: CLOEXEC ) = 3 1.163 ( 0.002 ms): fstat(fd: 3, statbuf: 0x7fff1b25e610 ) = 0 1.192 ( 0.009 ms): open(filename: 0x7fd73f4fedb8, flags: CLOEXEC ) = 3 1.201 ( 0.002 ms): fstat(fd: 3, statbuf: 0x7fff1b25e660 ) = 0 1.501 ( 0.013 ms): open(filename: 0x7fd73f0a1610, flags: CLOEXEC ) = 3 1.505 ( 0.002 ms): fstat(fd: 3, statbuf: 0x7fd73f2ddb60 ) = 0 1.581 ( 0.011 ms): open(filename: 0x7fff1b2603da, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: 438) = 3 [acme@zoo linux]$ After: [acme@zoo linux]$ trace touch -e open,fstat,dup2,mmap,close /tmp/b 1.105 ( 0.004 ms): mmap(len: 4096, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS, fd: -1 ) = 0x2fbf000 1.136 ( 0.008 ms): open(filename: 0x7f8902dbc088, flags: CLOEXEC ) = 3 1.140 ( 0.002 ms): fstat(fd: 3, statbuf: 0x7fff19889ef0 ) = 0 1.146 ( 0.004 ms): mmap(len: 86079, prot: READ, flags: PRIVATE, fd: 3 ) = 0x2fa9000 1.149 ( 0.001 ms): close(fd: 3 ) = 0 1.170 ( 0.010 ms): open(filename: 0x7f8902fbcdb8, flags: CLOEXEC ) = 3 1.178 ( 0.002 ms): fstat(fd: 3, statbuf: 0x7fff19889f40 ) = 0 1.188 ( 0.006 ms): mmap(len: 3924576, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x29e2000 1.207 ( 0.007 ms): mmap(addr: 0x7f8902d96000, len: 24576, prot: READ|WRITE, flags: PRIVATE|DENYWRITE|FIXED, fd: 3, off: 1785856) = 0x2d96000 1.217 ( 0.004 ms): mmap(addr: 0x7f8902d9c000, len: 16992, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS|FIXED, fd: -1) = 0x2d9c000 1.228 ( 0.002 ms): close(fd: 3 ) = 0 1.243 ( 0.003 ms): mmap(len: 4096, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS, fd: -1 ) = 0x2fa8000 1.250 ( 0.003 ms): mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS, fd: -1 ) = 0x2fa6000 1.452 ( 0.010 ms): open(filename: 0x7f8902b5f610, flags: CLOEXEC ) = 3 1.455 ( 0.002 ms): fstat(fd: 3, statbuf: 0x7f8902d9bb60 ) = 0 1.461 ( 0.004 ms): mmap(len: 106070960, prot: READ, flags: PRIVATE, fd: 3) = 0xfc4b9000 1.469 ( 0.002 ms): close(fd: 3 ) = 0 1.528 ( 0.010 ms): open(filename: 0x7fff1988c3da, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: 438) = 3 1.532 ( 0.002 ms): dup2(oldfd: 3 ) = 0 1.535 ( 0.001 ms): close(fd: 3 ) = 0 1.544 ( 0.001 ms): close( ) = 0 1.555 ( 0.001 ms): close(fd: 1 ) = 0 1.558 ( 0.001 ms): close(fd: 2 ) = 0 [acme@zoo linux]$ Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vcm22xpjxc3j4hbyuzjzf7ik@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f954c26de231..5ab2f674fed2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1276,11 +1276,11 @@ static const char *thread__fd_path(struct thread *thread, int fd, if (fd < 0) return NULL; - if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) + if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { if (!trace->live) return NULL; ++trace->stats.proc_getname; - if (thread__read_fd_path(thread, fd)) { + if (thread__read_fd_path(thread, fd)) return NULL; } -- cgit v1.2.3 From 5229e366ee6baeb58b77e09643d2e11cbbd29950 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Jun 2014 17:18:54 -0300 Subject: perf evlist: Add suggestion of how to set perf_event_paranoid sysctl Minor hint to speed up problem resolution and get 'trace' working for non root users. Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-abdqi8km4fj9osrn70q2zj9v@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 59ef2802fcf6..c51223ac25f4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1214,10 +1214,11 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, "For your workloads it needs to be <= 1\nHint:\t"); } printed += scnprintf(buf + printed, size - printed, - "For system wide tracing it needs to be set to -1"); + "For system wide tracing it needs to be set to -1.\n"); printed += scnprintf(buf + printed, size - printed, - ".\nHint:\tThe current value is %d.", value); + "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" + "Hint:\tThe current value is %d.", value); break; default: scnprintf(buf, size, "%s", emsg); -- cgit v1.2.3 From 774135344fa8aa044290d030068f92e9a3aab8cc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Jun 2014 17:21:59 -0300 Subject: perf trace: Remove needless reassignments The thread->priv value is already obtained a few lines earlier from the thread__trace() call. Leftovers from before thread__trace(). Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-96laa634vzfwlwxurevo40wp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5ab2f674fed2..28c86e21fad9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1629,7 +1629,6 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, return -1; args = perf_evsel__sc_tp_ptr(evsel, args, sample); - ttrace = thread->priv; if (ttrace->entry_str == NULL) { ttrace->entry_str = malloc(1024); @@ -1687,8 +1686,6 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ++trace->stats.vfs_getname; } - ttrace = thread->priv; - ttrace->exit_time = sample->time; if (ttrace->entry_time) { -- cgit v1.2.3 From 5089f20ee7104bd219cafefa62d83f53e75cc44c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Jun 2014 14:29:24 -0300 Subject: perf trace: Cache the is_exit syscall test No need to use two strcmp calls per syscall entry, do it just once, when reading the per syscall info. Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-lymtxhz0mg3adyt5e2pssn8f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 28c86e21fad9..5549cee61680 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1108,6 +1108,7 @@ struct syscall { struct event_format *tp_format; const char *name; bool filtered; + bool is_exit; struct syscall_fmt *fmt; size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); void **arg_parm; @@ -1473,6 +1474,8 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (sc->tp_format == NULL) return -1; + sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); + return syscall__set_arg_fmts(sc); } @@ -1643,7 +1646,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args, trace, thread); - if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { + if (sc->is_exit) { if (!trace->duration_filter && !trace->summary_only) { trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); fprintf(trace->output, "%-70s\n", ttrace->entry_str); -- cgit v1.2.3 From dd00d486ddb7f181cf9487f6aceb1066bc6b0b6a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 19 Jun 2014 13:41:13 +0200 Subject: perf hists browser: Remove ev_name argument from perf_evsel__hists_browse Removing ev_name argument from perf_evsel__hists_browse function, because it's not needed. We can get the name out of the 'struct perf_evsel' which is passed as argument as well. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1403178076-14072-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 52c03fbbba17..1bd35e8ed9f1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -32,8 +32,7 @@ struct hist_browser { extern void hist_browser__init_hpp(void); -static int hists__browser_title(struct hists *hists, char *bf, size_t size, - const char *ev_name); +static int hists__browser_title(struct hists *hists, char *bf, size_t size); static void hist_browser__update_nr_entries(struct hist_browser *hb); static struct rb_node *hists__filter_entries(struct rb_node *nd, @@ -345,7 +344,7 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser) "Or reduce the sampling frequency."); } -static int hist_browser__run(struct hist_browser *browser, const char *ev_name, +static int hist_browser__run(struct hist_browser *browser, struct hist_browser_timer *hbt) { int key; @@ -356,7 +355,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name, browser->b.nr_entries = hist_browser__nr_entries(browser); hist_browser__refresh_dimensions(browser); - hists__browser_title(browser->hists, title, sizeof(title), ev_name); + hists__browser_title(browser->hists, title, sizeof(title)); if (ui_browser__show(&browser->b, title, "Press '?' for help on key bindings") < 0) @@ -383,7 +382,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name, ui_browser__warn_lost_events(&browser->b); } - hists__browser_title(browser->hists, title, sizeof(title), ev_name); + hists__browser_title(browser->hists, title, sizeof(title)); ui_browser__show_title(&browser->b, title); continue; } @@ -1212,8 +1211,7 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *browser return browser->he_selection->thread; } -static int hists__browser_title(struct hists *hists, char *bf, size_t size, - const char *ev_name) +static int hists__browser_title(struct hists *hists, char *bf, size_t size) { char unit; int printed; @@ -1222,6 +1220,7 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; struct perf_evsel *evsel = hists_to_evsel(hists); + const char *ev_name = perf_evsel__name(evsel); char buf[512]; size_t buflen = sizeof(buf); @@ -1389,7 +1388,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) } static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, - const char *helpline, const char *ev_name, + const char *helpline, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, @@ -1464,7 +1463,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, nr_options = 0; - key = hist_browser__run(browser, ev_name, hbt); + key = hist_browser__run(browser, hbt); if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); @@ -1832,7 +1831,7 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu, { struct perf_evlist *evlist = menu->b.priv; struct perf_evsel *pos; - const char *ev_name, *title = "Available samples"; + const char *title = "Available samples"; int delay_secs = hbt ? hbt->refresh : 0; int key; @@ -1865,9 +1864,8 @@ browse_hists: */ if (hbt) hbt->timer(hbt->arg); - ev_name = perf_evsel__name(pos); key = perf_evsel__hists_browse(pos, nr_events, help, - ev_name, true, hbt, + true, hbt, menu->min_pcnt, menu->env); ui_browser__show_title(&menu->b, title); @@ -1971,10 +1969,9 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, single_entry: if (nr_entries == 1) { struct perf_evsel *first = perf_evlist__first(evlist); - const char *ev_name = perf_evsel__name(first); return perf_evsel__hists_browse(first, nr_entries, help, - ev_name, false, hbt, min_pcnt, + false, hbt, min_pcnt, env); } -- cgit v1.2.3 From 89632972e2c56356d1e227aac151cf4e7c2f30d6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 19 Jun 2014 13:41:14 +0200 Subject: perf ui browser: Fix scrollbar refresh row index The ui_browser__gotorc function needs offset from 'y' member, so the row index has to begin with 0, which happens by accident in current code, because we display only one header line. The bug shows when we want to display more than 1 header lines like columns headers in following patches. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1403178076-14072-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 3ccf6e14f89b..9d2294efc00c 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -279,7 +279,7 @@ static void ui_browser__scrollbar_set(struct ui_browser *browser) { int height = browser->height, h = 0, pct = 0, col = browser->width, - row = browser->y - 1; + row = 0; if (browser->nr_entries > 1) { pct = ((browser->index * (browser->height - 1)) / -- cgit v1.2.3 From b094c99e8e284cff839400a3b61fda1fa53962fc Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jun 2014 11:14:22 -0700 Subject: perf bench sched-messaging: Plug memleak Explicitly free the thread array ('pth_tab'). Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Hitoshi Mitake Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1402942467-10671-5-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index cc1190a0849b..fc4fe91ee098 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -332,5 +332,7 @@ int bench_sched_messaging(int argc, const char **argv, break; } + free(pth_tab); + return 0; } -- cgit v1.2.3 From b6f0629a94f7ed6089560be7f0561be19f934fc4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jun 2014 11:14:19 -0700 Subject: perf bench: Add --repeat option There are a number of benchmarks that do single runs and as a result does not really help users gain a general idea of how the workload performs. So the user must either manually do multiple runs or just use single bogus results. This option will enable users to specify the amount of runs (arbitrarily defaulted to 10, to use the existing benchmarks default) through the '--repeat' option. Add it to perf-bench instead of implementing it always in each specific benchmark. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Hitoshi Mitake Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1402942467-10671-2-git-send-email-davidlohr@hp.com [ Kept the existing default of 10, changing it to something else should be done on separate patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 4 ++++ tools/perf/bench/bench.h | 1 + tools/perf/builtin-bench.c | 7 +++++++ 3 files changed, 12 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 4464ad770d51..f6480cbf309b 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -16,6 +16,10 @@ This 'perf bench' command is a general framework for benchmark suites. COMMON OPTIONS -------------- +-r:: +--repeat=:: +Specify amount of times to repeat the run (default 10). + -f:: --format=:: Specify format style. diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index eba46709b279..3c4dd44d45cb 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -43,5 +43,6 @@ extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_UNKNOWN -1 extern int bench_format; +extern unsigned int bench_repeat; #endif diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 1e6e77710545..b9a56fa83330 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -104,9 +104,11 @@ static const char *bench_format_str; /* Output/formatting style, exported to benchmark modules: */ int bench_format = BENCH_FORMAT_DEFAULT; +unsigned int bench_repeat = 10; /* default number of times to repeat the run */ static const struct option bench_options[] = { OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"), + OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"), OPT_END() }; @@ -226,6 +228,11 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) goto end; } + if (bench_repeat == 0) { + printf("Invalid repeat option: Must specify a positive value\n"); + goto end; + } + if (argc < 1) { print_usage(); goto end; -- cgit v1.2.3 From d9de84afd1f3a464135abe2b26e66aa86be5af8d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jun 2014 11:14:23 -0700 Subject: perf bench futex: Use global --repeat option This option is available through perf-bench, use it instead and free the local option. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Hitoshi Mitake Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1402942467-10671-6-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-requeue.c | 10 +--------- tools/perf/bench/futex-wake.c | 12 ++---------- 2 files changed, 3 insertions(+), 19 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index a16255876f1d..732403bfd31a 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -29,13 +29,6 @@ static u_int32_t futex1 = 0, futex2 = 0; */ static unsigned int nrequeue = 1; -/* - * There can be significant variance from run to run, - * the more repeats, the more exact the overall avg and - * the better idea of the futex latency. - */ -static unsigned int repeat = 10; - static pthread_t *worker; static bool done = 0, silent = 0; static pthread_mutex_t thread_lock; @@ -46,7 +39,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0; static const struct option options[] = { OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), - OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), OPT_END() }; @@ -146,7 +138,7 @@ int bench_futex_requeue(int argc, const char **argv, pthread_cond_init(&thread_parent, NULL); pthread_cond_init(&thread_worker, NULL); - for (j = 0; j < repeat && !done; j++) { + for (j = 0; j < bench_repeat && !done; j++) { unsigned int nrequeued = 0; struct timeval start, end, runtime; diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index d096169b161e..50022cbce87e 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -30,15 +30,8 @@ static u_int32_t futex1 = 0; */ static unsigned int nwakes = 1; -/* - * There can be significant variance from run to run, - * the more repeats, the more exact the overall avg and - * the better idea of the futex latency. - */ -static unsigned int repeat = 10; - pthread_t *worker; -static bool done = 0, silent = 0; +static bool done = false, silent = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats waketime_stats, wakeup_stats; @@ -47,7 +40,6 @@ static unsigned int ncpus, threads_starting, nthreads = 0; static const struct option options[] = { OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"), - OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), OPT_END() }; @@ -149,7 +141,7 @@ int bench_futex_wake(int argc, const char **argv, pthread_cond_init(&thread_parent, NULL); pthread_cond_init(&thread_worker, NULL); - for (j = 0; j < repeat && !done; j++) { + for (j = 0; j < bench_repeat && !done; j++) { unsigned int nwoken = 0; struct timeval start, end, runtime; -- cgit v1.2.3 From 424e9634887842ac59c1d06d3264aaeb18853c0b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jun 2014 11:14:25 -0700 Subject: perf bench mem: The -o and -n options are mutually exclusive -o, --only-prefault Show only the result with page faults before mem* -n, --no-prefault Show only the result without page faults before mem* Makes no sense to call together. Applies to both memset and memcpy. Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Hitoshi Mitake Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1402942467-10671-8-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy.c | 5 +++++ tools/perf/bench/mem-memset.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 5ce71d3b72cf..e622c3e96efc 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -189,6 +189,11 @@ int bench_mem_memcpy(int argc, const char **argv, argc = parse_options(argc, argv, options, bench_mem_memcpy_usage, 0); + if (no_prefault && only_prefault) { + fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); + return 1; + } + if (use_cycle) init_cycle(); diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 9af79d2b18e5..2a65468619f0 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -181,6 +181,11 @@ int bench_mem_memset(int argc, const char **argv, argc = parse_options(argc, argv, options, bench_mem_memset_usage, 0); + if (no_prefault && only_prefault) { + fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); + return 1; + } + if (use_cycle) init_cycle(); -- cgit v1.2.3 From ecdac96899e3db3f428e4d2e978f25e3f8d35a6c Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jun 2014 11:14:26 -0700 Subject: perf bench sched-messaging: Drop barf() Instead of reinventing the wheel, we can use err(2) when dealing with fatal errors. Exit code is now always EXIT_FAILURE (1). Signed-off-by: Davidlohr Bueso Cc: Aswin Chandramouleeswaran Cc: Hitoshi Mitake Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1402942467-10671-9-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 45 ++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index fc4fe91ee098..52a56599a543 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -28,6 +28,7 @@ #include #include #include +#include #define DATASIZE 100 @@ -50,12 +51,6 @@ struct receiver_context { int wakefd; }; -static void barf(const char *msg) -{ - fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno)); - exit(1); -} - static void fdpair(int fds[2]) { if (use_pipes) { @@ -66,7 +61,7 @@ static void fdpair(int fds[2]) return; } - barf(use_pipes ? "pipe()" : "socketpair()"); + err(EXIT_FAILURE, use_pipes ? "pipe()" : "socketpair()"); } /* Block until we're ready to go */ @@ -77,11 +72,11 @@ static void ready(int ready_out, int wakefd) /* Tell them we're ready. */ if (write(ready_out, &dummy, 1) != 1) - barf("CLIENT: ready write"); + err(EXIT_FAILURE, "CLIENT: ready write"); /* Wait for "GO" signal */ if (poll(&pollfd, 1, -1) != 1) - barf("poll"); + err(EXIT_FAILURE, "poll"); } /* Sender sprays loops messages down each file descriptor */ @@ -101,7 +96,7 @@ again: ret = write(ctx->out_fds[j], data + done, sizeof(data)-done); if (ret < 0) - barf("SENDER: write"); + err(EXIT_FAILURE, "SENDER: write"); done += ret; if (done < DATASIZE) goto again; @@ -131,7 +126,7 @@ static void *receiver(struct receiver_context* ctx) again: ret = read(ctx->in_fds[0], data + done, DATASIZE - done); if (ret < 0) - barf("SERVER: read"); + err(EXIT_FAILURE, "SERVER: read"); done += ret; if (done < DATASIZE) goto again; @@ -144,14 +139,14 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *)) { pthread_attr_t attr; pthread_t childid; - int err; + int ret; if (!thread_mode) { /* process mode */ /* Fork the receiver. */ switch (fork()) { case -1: - barf("fork()"); + err(EXIT_FAILURE, "fork()"); break; case 0: (*func) (ctx); @@ -165,19 +160,17 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *)) } if (pthread_attr_init(&attr) != 0) - barf("pthread_attr_init:"); + err(EXIT_FAILURE, "pthread_attr_init:"); #ifndef __ia64__ if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) - barf("pthread_attr_setstacksize"); + err(EXIT_FAILURE, "pthread_attr_setstacksize"); #endif - err = pthread_create(&childid, &attr, func, ctx); - if (err != 0) { - fprintf(stderr, "pthread_create failed: %s (%d)\n", - strerror(err), err); - exit(-1); - } + ret = pthread_create(&childid, &attr, func, ctx); + if (ret != 0) + err(EXIT_FAILURE, "pthread_create failed"); + return childid; } @@ -207,14 +200,14 @@ static unsigned int group(pthread_t *pth, + num_fds * sizeof(int)); if (!snd_ctx) - barf("malloc()"); + err(EXIT_FAILURE, "malloc()"); for (i = 0; i < num_fds; i++) { int fds[2]; struct receiver_context *ctx = malloc(sizeof(*ctx)); if (!ctx) - barf("malloc()"); + err(EXIT_FAILURE, "malloc()"); /* Create the pipe between client and server */ @@ -281,7 +274,7 @@ int bench_sched_messaging(int argc, const char **argv, pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); if (!pth_tab) - barf("main:malloc()"); + err(EXIT_FAILURE, "main:malloc()"); fdpair(readyfds); fdpair(wakefds); @@ -294,13 +287,13 @@ int bench_sched_messaging(int argc, const char **argv, /* Wait for everyone to be ready */ for (i = 0; i < total_children; i++) if (read(readyfds[0], &dummy, 1) != 1) - barf("Reading for readyfds"); + err(EXIT_FAILURE, "Reading for readyfds"); gettimeofday(&start, NULL); /* Kick them off */ if (write(wakefds[1], &dummy, 1) != 1) - barf("Writing to start them"); + err(EXIT_FAILURE, "Writing to start them"); /* Reap them all */ for (i = 0; i < total_children; i++) -- cgit v1.2.3 From 07100877ea8fd9b2feabb4dd78f3322892f6bd77 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 11 Jun 2014 16:09:08 -0300 Subject: perf scripts: Fallback to syscalls:* when raw_syscalls:* is not available Older kernels (e.g., RHEL6) do system call tracing via the syscalls:sys_{enter,exit} tracepoints rather than using raw_syscalls:*. Update perf python and perl scripts to fallback to syscalls:* when raw_syscalls:* isn't available. Signed-off-by: Daniel Bristot de Oliveira Cc: Ingo Molnar Cc: Luis Claudio R. Goncalves Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5a6c64081a3375bc3bc66351b14559678ef4d71e.1402507908.git.bristot@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/perl/bin/failed-syscalls-record | 3 ++- tools/perf/scripts/perl/failed-syscalls.pl | 5 +++++ tools/perf/scripts/python/bin/failed-syscalls-by-pid-record | 3 ++- tools/perf/scripts/python/bin/sctop-record | 3 ++- tools/perf/scripts/python/bin/syscall-counts-by-pid-record | 3 ++- tools/perf/scripts/python/bin/syscall-counts-record | 3 ++- tools/perf/scripts/python/failed-syscalls-by-pid.py | 5 +++++ tools/perf/scripts/python/sctop.py | 5 +++++ tools/perf/scripts/python/syscall-counts-by-pid.py | 5 +++++ tools/perf/scripts/python/syscall-counts.py | 5 +++++ 10 files changed, 35 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record index 8104895a7b67..74685f318379 100644 --- a/tools/perf/scripts/perl/bin/failed-syscalls-record +++ b/tools/perf/scripts/perl/bin/failed-syscalls-record @@ -1,2 +1,3 @@ #!/bin/bash -perf record -e raw_syscalls:sys_exit $@ +(perf record -e raw_syscalls:sys_exit $@ || \ + perf record -e syscalls:sys_exit $@) 2> /dev/null diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl index 94bc25a347eb..55e7ae4c5c88 100644 --- a/tools/perf/scripts/perl/failed-syscalls.pl +++ b/tools/perf/scripts/perl/failed-syscalls.pl @@ -26,6 +26,11 @@ sub raw_syscalls::sys_exit } } +sub syscalls::sys_exit +{ + raw_syscalls::sys_exit(@_) +} + sub trace_end { printf("\nfailed syscalls by comm:\n\n"); diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record index 8104895a7b67..74685f318379 100644 --- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record +++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record @@ -1,2 +1,3 @@ #!/bin/bash -perf record -e raw_syscalls:sys_exit $@ +(perf record -e raw_syscalls:sys_exit $@ || \ + perf record -e syscalls:sys_exit $@) 2> /dev/null diff --git a/tools/perf/scripts/python/bin/sctop-record b/tools/perf/scripts/python/bin/sctop-record index 4efbfaa7f6a5..d6940841e54f 100644 --- a/tools/perf/scripts/python/bin/sctop-record +++ b/tools/perf/scripts/python/bin/sctop-record @@ -1,2 +1,3 @@ #!/bin/bash -perf record -e raw_syscalls:sys_enter $@ +(perf record -e raw_syscalls:sys_enter $@ || \ + perf record -e syscalls:sys_enter $@) 2> /dev/null diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record index 4efbfaa7f6a5..d6940841e54f 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record +++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record @@ -1,2 +1,3 @@ #!/bin/bash -perf record -e raw_syscalls:sys_enter $@ +(perf record -e raw_syscalls:sys_enter $@ || \ + perf record -e syscalls:sys_enter $@) 2> /dev/null diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record index 4efbfaa7f6a5..d6940841e54f 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-record +++ b/tools/perf/scripts/python/bin/syscall-counts-record @@ -1,2 +1,3 @@ #!/bin/bash -perf record -e raw_syscalls:sys_enter $@ +(perf record -e raw_syscalls:sys_enter $@ || \ + perf record -e syscalls:sys_enter $@) 2> /dev/null diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py index 85805fac4116..266a8364bce5 100644 --- a/tools/perf/scripts/python/failed-syscalls-by-pid.py +++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py @@ -50,6 +50,11 @@ def raw_syscalls__sys_exit(event_name, context, common_cpu, except TypeError: syscalls[common_comm][common_pid][id][ret] = 1 +def syscalls__sys_exit(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, ret): + raw_syscalls__sys_exit(**locals()) + def print_error_totals(): if for_comm is not None: print "\nsyscall errors for %s:\n\n" % (for_comm), diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py index 42c267e292fa..c9f3058b7dd4 100644 --- a/tools/perf/scripts/python/sctop.py +++ b/tools/perf/scripts/python/sctop.py @@ -53,6 +53,11 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu, except TypeError: syscalls[id] = 1 +def syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + raw_syscalls__sys_enter(**locals()) + def print_syscall_totals(interval): while 1: clear_term() diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py index c64d1c55d745..cf2054c529c9 100644 --- a/tools/perf/scripts/python/syscall-counts-by-pid.py +++ b/tools/perf/scripts/python/syscall-counts-by-pid.py @@ -48,6 +48,11 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu, except TypeError: syscalls[common_comm][common_pid][id] = 1 +def syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + raw_syscalls__sys_enter(**locals()) + def print_syscall_totals(): if for_comm is not None: print "\nsyscall events for %s:\n\n" % (for_comm), diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py index b435d3f188e8..92b29381bd39 100644 --- a/tools/perf/scripts/python/syscall-counts.py +++ b/tools/perf/scripts/python/syscall-counts.py @@ -44,6 +44,11 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu, except TypeError: syscalls[id] = 1 +def syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + raw_syscalls__sys_enter(**locals()) + def print_syscall_totals(): if for_comm is not None: print "\nsyscall events for %s:\n\n" % (for_comm), -- cgit v1.2.3 From 0c82adcf141935b6312593a53f87342dbb12b704 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 26 Jun 2014 20:14:24 +0400 Subject: perf trace: Add perf_event parameter to tracepoint_handler It will be used by next pagefault tracing patches in the series. Signed-off-by: Stanislav Fomichev Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1403799268-1367-2-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5549cee61680..4a9e26b731fe 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1538,6 +1538,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, } typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event, struct perf_sample *sample); static struct syscall *trace__syscall_info(struct trace *trace, @@ -1610,6 +1611,7 @@ static void thread__update_stats(struct thread_trace *ttrace, } static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, struct perf_sample *sample) { char *msg; @@ -1658,6 +1660,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, struct perf_sample *sample) { int ret; @@ -1735,6 +1738,7 @@ out: } static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, struct perf_sample *sample) { trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); @@ -1742,6 +1746,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, } static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, struct perf_sample *sample) { u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); @@ -1781,7 +1786,7 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample) } static int trace__process_sample(struct perf_tool *tool, - union perf_event *event __maybe_unused, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused) @@ -1799,7 +1804,7 @@ static int trace__process_sample(struct perf_tool *tool, if (handler) { ++trace->nr_events; - handler(trace, evsel, sample); + handler(trace, evsel, event, sample); } return err; @@ -1990,7 +1995,7 @@ again: } handler = evsel->handler; - handler(trace, evsel, &sample); + handler(trace, evsel, event, &sample); next_event: perf_evlist__mmap_consume(evlist, i); -- cgit v1.2.3 From 598d02c5a07b60e5c824184cdaf697b70f3c452a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 26 Jun 2014 20:14:25 +0400 Subject: perf trace: Add support for pagefault tracing This patch adds optional pagefault tracing support to 'perf trace'. Using -F/--pf option user can specify whether he wants minor, major or all pagefault events to be traced. This patch adds only live mode, record and replace will come in a separate patch. Example output: 1756272.905 ( 0.000 ms): curl/5937 majfault [0x7fa7261978b6] => /usr/lib/x86_64-linux-gnu/libkrb5.so.26.0.0@0x85288 (d.) 1862866.036 ( 0.000 ms): wget/8460 majfault [__clear_user+0x3f] => 0x659cb4 (?k) Signed-off-by: Stanislav Fomichev Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1403799268-1367-3-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 39 ++++++++++ tools/perf/builtin-trace.c | 125 +++++++++++++++++++++++++++++++- 2 files changed, 163 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index fae38d9a44a4..72397d9aa2ec 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -107,6 +107,45 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show tool stats such as number of times fd->pathname was discovered thru hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. +-F=[all|min|maj]:: +--pf=[all|min|maj]:: + Trace pagefaults. Optionally, you can specify whether you want minor, + major or all pagefaults. Default value is maj. + +PAGEFAULTS +---------- + +When tracing pagefaults, the format of the trace is as follows: + +fault [+] => (). + +- min/maj indicates whether fault event is minor or major; +- ip.symbol shows symbol for instruction pointer (the code that generated the + fault); if no debug symbols available, perf trace will print raw IP; +- addr.dso shows DSO for the faulted address; +- map type is either 'd' for non-executable maps or 'x' for executable maps; +- addr level is either 'k' for kernel dso or '.' for user dso. + +For symbols resolution you may need to install debugging symbols. + +Please be aware that duration is currently always 0 and doesn't reflect actual +time it took for fault to be handled! + +When --verbose specified, perf trace tries to print all available information +for both IP and fault address in the form of dso@symbol+offset. + +EXAMPLES +-------- + +Trace syscalls, major and minor pagefaults: + + $ perf trace -F all + + 1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.) + + As you can see, there was major pagefault in python process, from + CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4a9e26b731fe..1985c3b8cc06 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1178,6 +1178,9 @@ fail: return NULL; } +#define TRACE_PFMAJ (1 << 0) +#define TRACE_PFMIN (1 << 1) + struct trace { struct perf_tool tool; struct { @@ -1212,6 +1215,7 @@ struct trace { bool summary_only; bool show_comm; bool show_tool_stats; + int trace_pgfaults; }; static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) @@ -1773,6 +1777,68 @@ out_dump: return 0; } +static void print_location(FILE *f, struct perf_sample *sample, + struct addr_location *al, + bool print_dso, bool print_sym) +{ + + if ((verbose || print_dso) && al->map) + fprintf(f, "%s@", al->map->dso->long_name); + + if ((verbose || print_sym) && al->sym) + fprintf(f, "%s+0x%lx", al->sym->name, + al->addr - al->sym->start); + else if (al->map) + fprintf(f, "0x%lx", al->addr); + else + fprintf(f, "0x%lx", sample->addr); +} + +static int trace__pgfault(struct trace *trace, + struct perf_evsel *evsel, + union perf_event *event, + struct perf_sample *sample) +{ + struct thread *thread; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct addr_location al; + char map_type = 'd'; + + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + + thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION, + sample->ip, &al); + + trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); + + fprintf(trace->output, "%sfault [", + evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? + "maj" : "min"); + + print_location(trace->output, sample, &al, false, true); + + fprintf(trace->output, "] => "); + + thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE, + sample->addr, &al); + + if (!al.map) { + thread__find_addr_location(thread, trace->host, cpumode, + MAP__FUNCTION, sample->addr, &al); + + if (al.map) + map_type = 'x'; + else + map_type = '?'; + } + + print_location(trace->output, sample, &al, true, false); + + fprintf(trace->output, " (%c%c)\n", map_type, al.level); + + return 0; +} + static bool skip_sample(struct trace *trace, struct perf_sample *sample) { if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || @@ -1887,6 +1953,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) perf_evlist__add(evlist, evsel); } +static int perf_evlist__add_pgfault(struct perf_evlist *evlist, + u64 config) +{ + struct perf_evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .mmap_data = 1, + .sample_period = 1, + }; + + attr.config = config; + + event_attr_init(&attr); + + evsel = perf_evsel__new(&attr); + if (!evsel) + return -ENOMEM; + + evsel->handler = trace__pgfault; + perf_evlist__add(evlist, evsel); + + return 0; +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1907,6 +1997,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add_vfs_getname(evlist); + if ((trace->trace_pgfaults & TRACE_PFMAJ) && + perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) + goto out_error_tp; + + if ((trace->trace_pgfaults & TRACE_PFMIN) && + perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) + goto out_error_tp; + if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) @@ -1987,7 +2085,8 @@ again: goto next_event; } - if (sample.raw_data == NULL) { + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + sample.raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", perf_evsel__name(evsel), sample.tid, sample.cpu, sample.raw_size); @@ -2269,6 +2368,23 @@ static int trace__open_output(struct trace *trace, const char *filename) return trace->output == NULL ? -errno : 0; } +static int parse_pagefaults(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int *trace_pgfaults = opt->value; + + if (strcmp(str, "all") == 0) + *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; + else if (strcmp(str, "maj") == 0) + *trace_pgfaults |= TRACE_PFMAJ; + else if (strcmp(str, "min") == 0) + *trace_pgfaults |= TRACE_PFMIN; + else + return -1; + + return 0; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const trace_usage[] = { @@ -2335,6 +2451,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "Show only syscall summary with statistics"), OPT_BOOLEAN('S', "with-summary", &trace.summary, "Show all syscalls and summary with statistics"), + OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", + "Trace pagefaults", parse_pagefaults, "maj"), OPT_END() }; int err; @@ -2349,6 +2467,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (trace.summary_only) trace.summary = trace.summary_only; + if (trace.trace_pgfaults) { + trace.opts.sample_address = true; + trace.opts.sample_time = true; + } + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { -- cgit v1.2.3 From 1e28fe0a4ff8680d5a0fb84995fd2444dac19cc4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 26 Jun 2014 20:14:26 +0400 Subject: perf trace: Add pagefaults record and replay support Previous commit added live pagefault trace support, this one adds record and replay support. Example: [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace -F all record -a sleep 10 [ perf record: Woken up 0 times to write data ] [ perf record: Captured and wrote 1029.722 MB perf.data (~44989242 samples) ] [root@zoo /]# ls -la perf.data -rw-------. 1 root root 1083921722 Jun 26 17:44 perf.data [root@zoo /]# perf evlist raw_syscalls:sys_enter raw_syscalls:sys_exit major-faults minor-faults [root@zoo /]# trace -i perf.data | grep -v trace\/ | tail -15 156.137 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.) 156.139 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0x123] => 0x0 (?.) 156.140 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0xc4] => 0x0 (?.) 156.144 ( 0.000 ms): perl/18476 minfault [_int_free+0xda] => 0x0 (?.) 156.151 ( 0.000 ms): perl/18476 minfault [_int_free+0x1df] => 0x0 (?.) 156.158 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.) 156.161 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.) 156.168 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.) 156.172 ( 0.000 ms): perl/18476 minfault [0xb4243] => 0x0 (?.) 156.173 ( 0.000 ms): perl/18476 minfault [_int_free+0xda] => 0x0 (?.) 156.183 ( 0.000 ms): perl/18476 minfault [Perl_hfree_next_entry+0xb4] => 0x0 (?.) 156.197 ( 0.000 ms): perl/18476 minfault [_int_free+0x1df] => 0x0 (?.) 156.216 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0x123] => 0x0 (?.) 156.221 ( 0.000 ms): perl/18476 minfault [Perl_sv_clear+0x123] => 0x0 (?.) [root@zoo /]# Signed-off-by: Stanislav Fomichev Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1403799268-1367-4-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 63 +++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 18 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1985c3b8cc06..0b58e24c7ccb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1897,7 +1897,7 @@ static int parse_target_str(struct trace *trace) return 0; } -static int trace__record(int argc, const char **argv) +static int trace__record(struct trace *trace, int argc, const char **argv) { unsigned int rec_argc, i, j; const char **rec_argv; @@ -1906,34 +1906,52 @@ static int trace__record(int argc, const char **argv) "-R", "-m", "1024", "-c", "1", - "-e", }; + const char * const sc_args[] = { "-e", }; + unsigned int sc_args_nr = ARRAY_SIZE(sc_args); + const char * const majpf_args[] = { "-e", "major-faults" }; + unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); + const char * const minpf_args[] = { "-e", "minor-faults" }; + unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); + /* +1 is for the event string below */ - rec_argc = ARRAY_SIZE(record_args) + 1 + argc; + rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + + majpf_args_nr + minpf_args_nr + argc; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) return -ENOMEM; + j = 0; for (i = 0; i < ARRAY_SIZE(record_args); i++) - rec_argv[i] = record_args[i]; + rec_argv[j++] = record_args[i]; + + for (i = 0; i < sc_args_nr; i++) + rec_argv[j++] = sc_args[i]; /* event string may be different for older kernels - e.g., RHEL6 */ if (is_valid_tracepoint("raw_syscalls:sys_enter")) - rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; + rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; else if (is_valid_tracepoint("syscalls:sys_enter")) - rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit"; + rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; else { pr_err("Neither raw_syscalls nor syscalls events exist.\n"); return -1; } - i++; - for (j = 0; j < (unsigned int)argc; j++, i++) - rec_argv[i] = argv[j]; + if (trace->trace_pgfaults & TRACE_PFMAJ) + for (i = 0; i < majpf_args_nr; i++) + rec_argv[j++] = majpf_args[i]; + + if (trace->trace_pgfaults & TRACE_PFMIN) + for (i = 0; i < minpf_args_nr; i++) + rec_argv[j++] = minpf_args[i]; + + for (i = 0; i < (unsigned int)argc; i++) + rec_argv[j++] = argv[i]; - return cmd_record(i, rec_argv, NULL); + return cmd_record(j, rec_argv, NULL); } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); @@ -2224,6 +2242,14 @@ static int trace__replay(struct trace *trace) goto out; } + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.type == PERF_TYPE_SOFTWARE && + (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || + evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || + evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) + evsel->handler = trace__pgfault; + } + err = parse_target_str(trace); if (err != 0) goto out; @@ -2458,20 +2484,21 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) int err; char bf[BUFSIZ]; - if ((argc > 1) && (strcmp(argv[1], "record") == 0)) - return trace__record(argc-2, &argv[2]); - - argc = parse_options(argc, argv, trace_options, trace_usage, 0); - - /* summary_only implies summary option, but don't overwrite summary if set */ - if (trace.summary_only) - trace.summary = trace.summary_only; + argc = parse_options(argc, argv, trace_options, trace_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (trace.trace_pgfaults) { trace.opts.sample_address = true; trace.opts.sample_time = true; } + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) + return trace__record(&trace, argc-1, &argv[1]); + + /* summary_only implies summary option, but don't overwrite summary if set */ + if (trace.summary_only) + trace.summary = trace.summary_only; + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { -- cgit v1.2.3 From e281a9606d7073c517f2571e83faaff029ddc1cf Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 26 Jun 2014 20:14:28 +0400 Subject: perf trace: Add possibility to switch off syscall events Currently, we may either trace syscalls or syscalls+pagefaults. We'd like to be able to trace *only* pagefaults and this commit implements this feature. Example: [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace --no-syscalls -F -p `pidof xchat` 0.000 ( 0.000 ms): xchat/4574 majfault [g_unichar_get_script+0x11] => /usr/lib64/libglib-2.0.so.0.3800.2@0xc403b (x.) 0.202 ( 0.000 ms): xchat/4574 majfault [_cairo_hash_table_lookup+0x53] => 0x2280ff0 (?.) 20.854 ( 0.000 ms): xchat/4574 majfault [gdk_cairo_set_source_pixbuf+0x110] => /usr/bin/xchat@0x6da1f (x.) 1022.000 ( 0.000 ms): xchat/4574 majfault [__memcpy_sse2_unaligned+0x29] => 0x7ff5a8ca0400 (?.) ^C[root@zoo /]# Below we can see malloc calls, 'trace' reading symbol tables in libraries to resolve symbols, etc. [root@zoo /]# echo 1 > /proc/sys/vm/drop_caches ; trace --no-syscalls -F all --cpu 1 sleep 10 0.000 ( 0.000 ms): chrome/26589 minfault [0x1b53129] => /tmp/perf-26589.map@0x33cbcbf7f000 (x.) 96.477 ( 0.000 ms): libvirtd/947 minfault [copy_user_enhanced_fast_string+0x5] => 0x7f7685bba000 (?k) 113.164 ( 0.000 ms): Xorg/1063 minfault [0x786da] => 0x7fce52882a3c (?.) 7162.801 ( 0.000 ms): chrome/3747 minfault [0x8e1a89] => 0xfcaefed0008 (?.) 7773.138 ( 0.000 ms): chrome/3886 minfault [0x8e1a89] => 0xfcb0ce28008 (?.) 7992.022 ( 0.000 ms): chrome/26574 minfault [0x1b5a708] => 0x3de7b5fc5000 (?.) 8108.949 ( 0.000 ms): qemu-system-x8/4537 majfault [_int_malloc+0xee] => 0x7faffc466d60 (?.) 8108.975 ( 0.000 ms): qemu-system-x8/4537 minfault [_int_malloc+0x102] => 0x7faffc466d60 (?.) 8148.174 ( 0.000 ms): qemu-system-x8/4537 minfault [_int_malloc+0x102] => 0x7faffc4eb500 (?.) 8270.855 ( 0.000 ms): chrome/26245 minfault [do_bo_emit_reloc+0xdb] => 0x45d092bc004 (?.) 8270.869 ( 0.000 ms): chrome/26245 minfault [do_bo_emit_reloc+0x108] => 0x45d09150000 (?.) no symbols found in /usr/lib64/libspice-server.so.1.9.0, maybe install a debug package? 8273.831 ( 0.000 ms): trace/20198 majfault [__memcmp_sse4_1+0xbc6] => /usr/lib64/libspice-server.so.1.9.0@0xdf000 (d.) 8275.121 ( 0.000 ms): trace/20198 minfault [dso__load+0x38] => 0x14fe756 (?.) no symbols found in /usr/lib64/libelf-0.158.so, maybe install a debug package? 8275.142 ( 0.000 ms): trace/20198 minfault [__memcmp_sse4_1+0xbc6] => /usr/lib64/libelf-0.158.so@0x0 (d.) [root@zoo /]# Signed-off-by: Stanislav Fomichev Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1403799268-1367-6-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 7 ++++ tools/perf/builtin-trace.c | 58 ++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 26 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 72397d9aa2ec..02aac831bdd9 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -112,6 +112,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Trace pagefaults. Optionally, you can specify whether you want minor, major or all pagefaults. Default value is maj. +--syscalls:: + Trace system calls. This options is enabled by default. + PAGEFAULTS ---------- @@ -137,6 +140,10 @@ for both IP and fault address in the form of dso@symbol+offset. EXAMPLES -------- +Trace only major pagefaults: + + $ perf trace --no-syscalls -F + Trace syscalls, major and minor pagefaults: $ perf trace -F all diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0b58e24c7ccb..dc7a694b61fe 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1215,6 +1215,7 @@ struct trace { bool summary_only; bool show_comm; bool show_tool_stats; + bool trace_syscalls; int trace_pgfaults; }; @@ -1927,17 +1928,19 @@ static int trace__record(struct trace *trace, int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[j++] = record_args[i]; - for (i = 0; i < sc_args_nr; i++) - rec_argv[j++] = sc_args[i]; - - /* event string may be different for older kernels - e.g., RHEL6 */ - if (is_valid_tracepoint("raw_syscalls:sys_enter")) - rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; - else if (is_valid_tracepoint("syscalls:sys_enter")) - rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; - else { - pr_err("Neither raw_syscalls nor syscalls events exist.\n"); - return -1; + if (trace->trace_syscalls) { + for (i = 0; i < sc_args_nr; i++) + rec_argv[j++] = sc_args[i]; + + /* event string may be different for older kernels - e.g., RHEL6 */ + if (is_valid_tracepoint("raw_syscalls:sys_enter")) + rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; + else if (is_valid_tracepoint("syscalls:sys_enter")) + rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; + else { + pr_err("Neither raw_syscalls nor syscalls events exist.\n"); + return -1; + } } if (trace->trace_pgfaults & TRACE_PFMAJ) @@ -2010,10 +2013,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out; } - if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) + if (trace->trace_syscalls && + perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, + trace__sys_exit)) goto out_error_tp; - perf_evlist__add_vfs_getname(evlist); + if (trace->trace_syscalls) + perf_evlist__add_vfs_getname(evlist); if ((trace->trace_pgfaults & TRACE_PFMAJ) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) @@ -2215,13 +2221,10 @@ static int trace__replay(struct trace *trace) if (evsel == NULL) evsel = perf_evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter"); - if (evsel == NULL) { - pr_err("Data file does not have raw_syscalls:sys_enter event\n"); - goto out; - } - if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || - perf_evsel__init_sc_tp_ptr_field(evsel, args)) { + if (evsel && + (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || + perf_evsel__init_sc_tp_ptr_field(evsel, args))) { pr_err("Error during initialize raw_syscalls:sys_enter event\n"); goto out; } @@ -2231,13 +2234,9 @@ static int trace__replay(struct trace *trace) if (evsel == NULL) evsel = perf_evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); - if (evsel == NULL) { - pr_err("Data file does not have raw_syscalls:sys_exit event\n"); - goto out; - } - - if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || - perf_evsel__init_sc_tp_uint_field(evsel, ret)) { + if (evsel && + (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || + perf_evsel__init_sc_tp_uint_field(evsel, ret))) { pr_err("Error during initialize raw_syscalls:sys_exit event\n"); goto out; } @@ -2440,6 +2439