From a701d28e2d997705ae4376753af6e35b20029cef Mon Sep 17 00:00:00 2001 From: Dengcheng Zhu Date: Mon, 19 Oct 2020 15:21:24 +0800 Subject: perf annotate mips: Add perf arch instructions annotate handlers Support the MIPS architecture using the ins_ops association method. With this patch, perf-annotate can work well on MIPS. Testing it with a perf.data file collected on a mips machine: $./perf annotate -i perf.data : Disassembly of section .text: : : 00000000000be6a0 : : get_next_seq(): 0.00 : be6a0: lw v0,0(a0) 0.00 : be6a4: daddiu sp,sp,-128 0.00 : be6a8: ld a7,72(a0) 0.00 : be6ac: gssq s5,s4,80(sp) 0.00 : be6b0: gssq s1,s0,48(sp) 0.00 : be6b4: gssq s8,gp,112(sp) 0.00 : be6b8: gssq s7,s6,96(sp) 0.00 : be6bc: gssq s3,s2,64(sp) 0.00 : be6c0: sd a3,0(sp) 0.00 : be6c4: move s0,a0 0.00 : be6c8: sd v0,32(sp) 0.00 : be6cc: sd a5,8(sp) 0.00 : be6d0: sd zero,8(a0) 0.00 : be6d4: sd a6,16(sp) 0.00 : be6d8: ld s2,48(a0) 8.53 : be6dc: ld s1,40(a0) 9.42 : be6e0: ld v1,32(a0) 0.00 : be6e4: nop 0.00 : be6e8: ld s4,24(a0) 0.00 : be6ec: ld s5,16(a0) 0.00 : be6f0: sd a7,40(sp) 10.11 : be6f4: ld s6,64(a0) ... The original patch link: https://lore.kernel.org/patchwork/patch/1180480/ Signed-off-by: Dengcheng Zhu Cc: Dengcheng Zhu Cc: Jiaxun Yang Cc: Peter Zijlstra Cc: Xuefeng Li Cc: linux-mips@vger.kernel.org [ fanpeng@loongson.cn: Add missing "bgtzl", "bltzl", "bgezl", "blezl", "beql" and "bnel" for pre-R6processors ] Signed-off-by: Peng Fan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/mips/Build | 2 +- tools/perf/arch/mips/annotate/instructions.c | 46 ++++++++++++++++++++++++++++ tools/perf/util/annotate.c | 8 +++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/mips/annotate/instructions.c (limited to 'tools') diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build index 1bb8bf6d7fd4..e4e5f33c84d8 100644 --- a/tools/perf/arch/mips/Build +++ b/tools/perf/arch/mips/Build @@ -1 +1 @@ -# empty +perf-y += util/ diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c new file mode 100644 index 000000000000..340993f2a897 --- /dev/null +++ b/tools/perf/arch/mips/annotate/instructions.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +static +struct ins_ops *mips__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "bal", 3) || + !strncmp(name, "bgezal", 6) || + !strncmp(name, "bltzal", 6) || + !strncmp(name, "bgtzal", 6) || + !strncmp(name, "blezal", 6) || + !strncmp(name, "beqzal", 6) || + !strncmp(name, "bnezal", 6) || + !strncmp(name, "bgtzl", 5) || + !strncmp(name, "bltzl", 5) || + !strncmp(name, "bgezl", 5) || + !strncmp(name, "blezl", 5) || + !strncmp(name, "jialc", 5) || + !strncmp(name, "beql", 4) || + !strncmp(name, "bnel", 4) || + !strncmp(name, "jal", 3)) + ops = &call_ops; + else if (!strncmp(name, "jr", 2)) + ops = &ret_ops; + else if (name[0] == 'j' || name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = mips__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 6c8575e182ed..e52053a6ad42 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -152,6 +152,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arm/annotate/instructions.c" #include "arch/arm64/annotate/instructions.c" #include "arch/csky/annotate/instructions.c" +#include "arch/mips/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" @@ -174,6 +175,13 @@ static struct arch architectures[] = { .name = "csky", .init = csky__annotate_init, }, + { + .name = "mips", + .init = mips__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, { .name = "x86", .init = x86__annotate_init, -- cgit v1.2.3 From a7c77c4f52c80fffc53b4c616a95f96d57170933 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 19 Oct 2020 16:25:45 -0700 Subject: perf version: Add a feature for libpfm4 If perf is built with libpfm4 (LIBPFM4=1) then advertise it in perf -vv. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201019232545.4047264-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-version.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index d09ec2f03071..9cd074a3d825 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -80,6 +80,7 @@ static void library_status(void) STATUS(HAVE_LIBBPF_SUPPORT, bpf); STATUS(HAVE_AIO_SUPPORT, aio); STATUS(HAVE_ZSTD_SUPPORT, zstd); + STATUS(HAVE_LIBPFM, libpfm4); } int cmd_version(int argc, const char **argv) -- cgit v1.2.3 From 0ee281e1e4e12f8c09b99f80a2482a55cd7d6bca Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 08:36:13 +0800 Subject: perf mem2node: Improve warning if detected no memory nodes Some archs (e.g. x86 and Arm64) don't enable the configuration CONFIG_MEMORY_HOTPLUG by default, if this configuration is not enabled when build the kernel image, the SysFS for memory nodes will be missed. This results in perf tool has no chance to catpure the memory nodes information, when perf tool reports the result and detects no memory nodes, it outputs "assertion failed at util/mem2node.c:99". The output log doesn't give out reason for the failure and users have no clue for how to fix it. This patch changes to use explicit way for warning: it tells user that detected no memory nodes and suggests to enable CONFIG_MEMORY_HOTPLUG for kernel building. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201019003613.8399-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem2node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c index c84f5841c7ab..03a7d7b27737 100644 --- a/tools/perf/util/mem2node.c +++ b/tools/perf/util/mem2node.c @@ -96,7 +96,8 @@ int mem2node__init(struct mem2node *map, struct perf_env *env) /* Cut unused entries, due to merging. */ tmp_entries = realloc(entries, sizeof(*entries) * j); - if (tmp_entries || WARN_ON_ONCE(j == 0)) + if (tmp_entries || + WARN_ONCE(j == 0, "No memory nodes, is CONFIG_MEMORY_HOTPLUG enabled?\n")) entries = tmp_entries; for (i = 0; i < j; i++) { -- cgit v1.2.3 From 3989bbf9607d6716900d9df91c46a2ce8a504b93 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 18:02:35 +0800 Subject: perf tests tsc: Make tsc testing as a common testing x86 arch provides the testing for conversion between tsc and perf time, the testing is located in x86 arch folder. Move this testing out from x86 arch folder and place it into the common testing folder, so allows to execute tsc testing on other architectures (e.g. Arm64). This patch removes the inclusion of "arch-tests.h" from the testing code, this can avoid building failure if any arch has no this header file. Committer testing: $ perf test -v tsc Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc 70: Convert perf time to TSC : --- start --- test child forked, pid 4032834 mmap size 528384B 1st event perf time 165409788843605 tsc 336578703793868 rdtsc time 165409788854986 tsc 336578703837038 2nd event perf time 165409788855487 tsc 336578703838935 test child finished with 0 ---- end ---- Convert perf time to TSC: Ok $ Signed-off-by: Leo Yan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201019100236.23675-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 - tools/perf/arch/x86/tests/Build | 1 - tools/perf/arch/x86/tests/arch-tests.c | 4 - tools/perf/arch/x86/tests/perf-time-to-tsc.c | 173 --------------------------- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/perf-time-to-tsc.c | 171 ++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 8 files changed, 177 insertions(+), 179 deletions(-) delete mode 100644 tools/perf/arch/x86/tests/perf-time-to-tsc.c create mode 100644 tools/perf/tests/perf-time-to-tsc.c (limited to 'tools') diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index c41c5affe4be..6a54b94f1c25 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -7,7 +7,6 @@ struct test; /* Tests */ int test__rdpmc(struct test *test __maybe_unused, int subtest); -int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 2997c506550c..36d4f248b51d 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,6 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += rdpmc.o -perf-y += perf-time-to-tsc.o perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 6763135aec17..bc25d727b4e9 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -8,10 +8,6 @@ struct test arch_tests[] = { .desc = "x86 rdpmc", .func = test__rdpmc, }, - { - .desc = "Convert perf time to TSC", - .func = test__perf_time_to_tsc, - }, #ifdef HAVE_DWARF_UNWIND_SUPPORT { .desc = "DWARF unwind", diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c deleted file mode 100644 index 026d32ed078e..000000000000 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ /dev/null @@ -1,173 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "debug.h" -#include "parse-events.h" -#include "evlist.h" -#include "evsel.h" -#include "thread_map.h" -#include "record.h" -#include "tsc.h" -#include "util/mmap.h" -#include "tests/tests.h" - -#include "arch-tests.h" - -#define CHECK__(x) { \ - while ((x) < 0) { \ - pr_debug(#x " failed!\n"); \ - goto out_err; \ - } \ -} - -#define CHECK_NOT_NULL__(x) { \ - while ((x) == NULL) { \ - pr_debug(#x " failed!\n"); \ - goto out_err; \ - } \ -} - -/** - * test__perf_time_to_tsc - test converting perf time to TSC. - * - * This function implements a test that checks that the conversion of perf time - * to and from TSC is consistent with the order of events. If the test passes - * %0 is returned, otherwise %-1 is returned. If TSC conversion is not - * supported then then the test passes but " (not supported)" is printed. - */ -int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused) -{ - struct record_opts opts = { - .mmap_pages = UINT_MAX, - .user_freq = UINT_MAX, - .user_interval = ULLONG_MAX, - .target = { - .uses_mmap = true, - }, - .sample_time = true, - }; - struct perf_thread_map *threads = NULL; - struct perf_cpu_map *cpus = NULL; - struct evlist *evlist = NULL; - struct evsel *evsel = NULL; - int err = -1, ret, i; - const char *comm1, *comm2; - struct perf_tsc_conversion tc; - struct perf_event_mmap_page *pc; - union perf_event *event; - u64 test_tsc, comm1_tsc, comm2_tsc; - u64 test_time, comm1_time = 0, comm2_time = 0; - struct mmap *md; - - threads = thread_map__new(-1, getpid(), UINT_MAX); - CHECK_NOT_NULL__(threads); - - cpus = perf_cpu_map__new(NULL); - CHECK_NOT_NULL__(cpus); - - evlist = evlist__new(); - CHECK_NOT_NULL__(evlist); - - perf_evlist__set_maps(&evlist->core, cpus, threads); - - CHECK__(parse_events(evlist, "cycles:u", NULL)); - - perf_evlist__config(evlist, &opts, NULL); - - evsel = evlist__first(evlist); - - evsel->core.attr.comm = 1; - evsel->core.attr.disabled = 1; - evsel->core.attr.enable_on_exec = 0; - - CHECK__(evlist__open(evlist)); - - CHECK__(evlist__mmap(evlist, UINT_MAX)); - - pc = evlist->mmap[0].core.base; - ret = perf_read_tsc_conversion(pc, &tc); - if (ret) { - if (ret == -EOPNOTSUPP) { - fprintf(stderr, " (not supported)"); - return 0; - } - goto out_err; - } - - evlist__enable(evlist); - - comm1 = "Test COMM 1"; - CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0)); - - test_tsc = rdtsc(); - - comm2 = "Test COMM 2"; - CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0)); - - evlist__disable(evlist); - - for (i = 0; i < evlist->core.nr_mmaps; i++) { - md = &evlist->mmap[i]; - if (perf_mmap__read_init(&md->core) < 0) - continue; - - while ((event = perf_mmap__read_event(&md->core)) != NULL) { - struct perf_sample sample; - - if (event->header.type != PERF_RECORD_COMM || - (pid_t)event->comm.pid != getpid() || - (pid_t)event->comm.tid != getpid()) - goto next_event; - - if (strcmp(event->comm.comm, comm1) == 0) { - CHECK__(evsel__parse_sample(evsel, event, &sample)); - comm1_time = sample.time; - } - if (strcmp(event->comm.comm, comm2) == 0) { - CHECK__(evsel__parse_sample(evsel, event, &sample)); - comm2_time = sample.time; - } -next_event: - perf_mmap__consume(&md->core); - } - perf_mmap__read_done(&md->core); - } - - if (!comm1_time || !comm2_time) - goto out_err; - - test_time = tsc_to_perf_time(test_tsc, &tc); - comm1_tsc = perf_time_to_tsc(comm1_time, &tc); - comm2_tsc = perf_time_to_tsc(comm2_time, &tc); - - pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n", - comm1_time, comm1_tsc); - pr_debug("rdtsc time %"PRIu64" tsc %"PRIu64"\n", - test_time, test_tsc); - pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n", - comm2_time, comm2_tsc); - - if (test_time <= comm1_time || - test_time >= comm2_time) - goto out_err; - - if (test_tsc <= comm1_tsc || - test_tsc >= comm2_tsc) - goto out_err; - - err = 0; - -out_err: - evlist__delete(evlist); - return err; -} diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 4d15bf6041fb..aa4dc4f5abde 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -62,6 +62,7 @@ perf-y += pfm.o perf-y += parse-metric.o perf-y += pe-file-parsing.o perf-y += expand-cgroup.o +perf-y += perf-time-to-tsc.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 132bdb3e6c31..02e7bbf70419 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -349,6 +349,10 @@ static struct test generic_tests[] = { .desc = "Event expansion for cgroups", .func = test__expand_cgroup_events, }, + { + .desc = "Convert perf time to TSC", + .func = test__perf_time_to_tsc, + }, { .func = NULL, }, diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c new file mode 100644 index 000000000000..aee97c16c0d9 --- /dev/null +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "parse-events.h" +#include "evlist.h" +#include "evsel.h" +#include "thread_map.h" +#include "record.h" +#include "tsc.h" +#include "mmap.h" +#include "tests.h" + +#define CHECK__(x) { \ + while ((x) < 0) { \ + pr_debug(#x " failed!\n"); \ + goto out_err; \ + } \ +} + +#define CHECK_NOT_NULL__(x) { \ + while ((x) == NULL) { \ + pr_debug(#x " failed!\n"); \ + goto out_err; \ + } \ +} + +/** + * test__perf_time_to_tsc - test converting perf time to TSC. + * + * This function implements a test that checks that the conversion of perf time + * to and from TSC is consistent with the order of events. If the test passes + * %0 is returned, otherwise %-1 is returned. If TSC conversion is not + * supported then then the test passes but " (not supported)" is printed. + */ +int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + struct record_opts opts = { + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .target = { + .uses_mmap = true, + }, + .sample_time = true, + }; + struct perf_thread_map *threads = NULL; + struct perf_cpu_map *cpus = NULL; + struct evlist *evlist = NULL; + struct evsel *evsel = NULL; + int err = -1, ret, i; + const char *comm1, *comm2; + struct perf_tsc_conversion tc; + struct perf_event_mmap_page *pc; + union perf_event *event; + u64 test_tsc, comm1_tsc, comm2_tsc; + u64 test_time, comm1_time = 0, comm2_time = 0; + struct mmap *md; + + threads = thread_map__new(-1, getpid(), UINT_MAX); + CHECK_NOT_NULL__(threads); + + cpus = perf_cpu_map__new(NULL); + CHECK_NOT_NULL__(cpus); + + evlist = evlist__new(); + CHECK_NOT_NULL__(evlist); + + perf_evlist__set_maps(&evlist->core, cpus, threads); + + CHECK__(parse_events(evlist, "cycles:u", NULL)); + + perf_evlist__config(evlist, &opts, NULL); + + evsel = evlist__first(evlist); + + evsel->core.attr.comm = 1; + evsel->core.attr.disabled = 1; + evsel->core.attr.enable_on_exec = 0; + + CHECK__(evlist__open(evlist)); + + CHECK__(evlist__mmap(evlist, UINT_MAX)); + + pc = evlist->mmap[0].core.base; + ret = perf_read_tsc_conversion(pc, &tc); + if (ret) { + if (ret == -EOPNOTSUPP) { + fprintf(stderr, " (not supported)"); + return 0; + } + goto out_err; + } + + evlist__enable(evlist); + + comm1 = "Test COMM 1"; + CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0)); + + test_tsc = rdtsc(); + + comm2 = "Test COMM 2"; + CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0)); + + evlist__disable(evlist); + + for (i = 0; i < evlist->core.nr_mmaps; i++) { + md = &evlist->mmap[i]; + if (perf_mmap__read_init(&md->core) < 0) + continue; + + while ((event = perf_mmap__read_event(&md->core)) != NULL) { + struct perf_sample sample; + + if (event->header.type != PERF_RECORD_COMM || + (pid_t)event->comm.pid != getpid() || + (pid_t)event->comm.tid != getpid()) + goto next_event; + + if (strcmp(event->comm.comm, comm1) == 0) { + CHECK__(evsel__parse_sample(evsel, event, &sample)); + comm1_time = sample.time; + } + if (strcmp(event->comm.comm, comm2) == 0) { + CHECK__(evsel__parse_sample(evsel, event, &sample)); + comm2_time = sample.time; + } +next_event: + perf_mmap__consume(&md->core); + } + perf_mmap__read_done(&md->core); + } + + if (!comm1_time || !comm2_time) + goto out_err; + + test_time = tsc_to_perf_time(test_tsc, &tc); + comm1_tsc = perf_time_to_tsc(comm1_time, &tc); + comm2_tsc = perf_time_to_tsc(comm2_time, &tc); + + pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n", + comm1_time, comm1_tsc); + pr_debug("rdtsc time %"PRIu64" tsc %"PRIu64"\n", + test_time, test_tsc); + pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n", + comm2_time, comm2_tsc); + + if (test_time <= comm1_time || + test_time >= comm2_time) + goto out_err; + + if (test_tsc <= comm1_tsc || + test_tsc >= comm2_tsc) + goto out_err; + + err = 0; + +out_err: + evlist__delete(evlist); + return err; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c85a2c08e407..c9b180e640e5 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -124,6 +124,7 @@ int test__pfm_subtest_get_nr(void); int test__parse_metric(struct test *test, int subtest); int test__pe_file_parsing(struct test *test, int subtest); int test__expand_cgroup_events(struct test *test, int subtest); +int test__perf_time_to_tsc(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); -- cgit v1.2.3 From 248dd9b591db5bc5fb46a0e015753cfcfe60a345 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 Oct 2020 18:02:36 +0800 Subject: perf tests tsc: Add checking helper is_supported() So far tsc is enabled on x86_64, i386 and Arm64 architectures, add checking helper to skip this testing for other architectures. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201019100236.23675-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/perf-time-to-tsc.c | 13 +++++++++++++ tools/perf/tests/tests.h | 1 + 3 files changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 02e7bbf70419..a185904c47f3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -352,6 +352,7 @@ static struct test generic_tests[] = { { .desc = "Convert perf time to TSC", .func = test__perf_time_to_tsc, + .is_supported = test__tsc_is_supported, }, { .func = NULL, diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index aee97c16c0d9..a9560e0f6360 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -169,3 +169,16 @@ out_err: evlist__delete(evlist); return err; } + +bool test__tsc_is_supported(void) +{ + /* + * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. + * Just enable the test for x86_64/i386 and Arm64 archs. + */ +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) + return true; +#else + return false; +#endif +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c9b180e640e5..b1f2aac93b33 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -129,6 +129,7 @@ int test__perf_time_to_tsc(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); bool test__wp_is_supported(void); +bool test__tsc_is_supported(void); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT -- cgit v1.2.3 From cc3b964d5eb49d0c9da08760f8760bb6945f1df5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 16 Oct 2020 16:16:50 +0300 Subject: perf test: Implement skip_reason callback for watchpoint tests Currently reason for skipping the read only watchpoint test is only seen when running in verbose mode: $ perf test watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : Skip 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok $ perf test -v watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : --- start --- test child forked, pid 60204 Hardware does not support read only watchpoints. test child finished with -2 Implement skip_reason callback for the watchpoint tests, so that it's easy to see reason why the test is skipped: $ perf test watchpoint 23: Watchpoint : 23.1: Read Only Watchpoint : Skip (missing hardware support) 23.2: Write Only Watchpoint : Ok 23.3: Read / Write Watchpoint : Ok 23.4: Modify Watchpoint : Ok Signed-off-by: Tommi Rantala Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201016131650.72476-1-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/tests.h | 1 + tools/perf/tests/wp.c | 21 +++++++++++++++------ 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index a185904c47f3..7273823d0d02 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -142,6 +142,7 @@ static struct test generic_tests[] = { .skip_if_fail = false, .get_nr = test__wp_subtest_get_nr, .get_desc = test__wp_subtest_get_desc, + .skip_reason = test__wp_subtest_skip_reason, }, }, { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b1f2aac93b33..8e24a61fe4c2 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -66,6 +66,7 @@ int test__bp_signal_overflow(struct test *test, int subtest); int test__bp_accounting(struct test *test, int subtest); int test__wp(struct test *test, int subtest); const char *test__wp_subtest_get_desc(int subtest); +const char *test__wp_subtest_skip_reason(int subtest); int test__wp_subtest_get_nr(void); int test__task_exit(struct test *test, int subtest); int test__mem(struct test *test, int subtest); diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index d262d6639829..9387fa76faa5 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -174,10 +174,12 @@ static bool wp_ro_supported(void) #endif } -static void wp_ro_skip_msg(void) +static const char *wp_ro_skip_msg(void) { #if defined (__x86_64__) || defined (__i386__) - pr_debug("Hardware does not support read only watchpoints.\n"); + return "missing hardware support"; +#else + return NULL; #endif } @@ -185,7 +187,7 @@ static struct { const char *desc; int (*target_func)(void); bool (*is_supported)(void); - void (*skip_msg)(void); + const char *(*skip_msg)(void); } wp_testcase_table[] = { { .desc = "Read Only Watchpoint", @@ -219,16 +221,23 @@ const char *test__wp_subtest_get_desc(int i) return wp_testcase_table[i].desc; } +const char *test__wp_subtest_skip_reason(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return NULL; + if (!wp_testcase_table[i].skip_msg) + return NULL; + return wp_testcase_table[i].skip_msg(); +} + int test__wp(struct test *test __maybe_unused, int i) { if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) return TEST_FAIL; if (wp_testcase_table[i].is_supported && - !wp_testcase_table[i].is_supported()) { - wp_testcase_table[i].skip_msg(); + !wp_testcase_table[i].is_supported()) return TEST_SKIP; - } return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL; } -- cgit v1.2.3 From c18cf78d7969db89934587fa476220eefe7bd4bd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 14:12:37 -0300 Subject: perf bpf: Enclose libbpf.h include within HAVE_LIBBPF_SUPPORT As it uses the 'deprecated' attribute in a way that breaks the build with old gcc compilers, so to continue being able to build in such systems where NO_LIBBPF=1 is being used, enclose it under HAVE_LIBBPF_SUPPORT. 1 centos:6 : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23) 2 oraclelinux:6 : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1) CC /tmp/build/perf/builtin-record.o In file included from util/bpf-loader.h:11, from builtin-record.c:39: /git/linux/tools/lib/bpf/libbpf.h:203: error: wrong number of arguments specified for 'deprecated' attribute Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.h | 3 +++ tools/perf/util/parse-events.c | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 25251d63164c..5d1c725cea29 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -8,6 +8,8 @@ #include #include + +#ifdef HAVE_LIBBPF_SUPPORT #include enum bpf_loader_errno { @@ -38,6 +40,7 @@ enum bpf_loader_errno { BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ __BPF_LOADER_ERRNO__END, }; +#endif // HAVE_LIBBPF_SUPPORT struct evsel; struct evlist; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3b273580fb84..3b581d7b3213 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -668,6 +668,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, return ret; } +#ifdef HAVE_LIBBPF_SUPPORT struct __add_bpf_event_param { struct parse_events_state *parse_state; struct list_head *list; @@ -900,6 +901,30 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, list_splice_tail(&obj_head_config, head_config); return err; } +#else // HAVE_LIBBPF_SUPPORT +int parse_events_load_bpf_obj(struct parse_events_state *parse_state, + struct list_head *list __maybe_unused, + struct bpf_object *obj __maybe_unused, + struct list_head *head_config __maybe_unused) +{ + parse_events__handle_error(parse_state->error, 0, + strdup("BPF support is not compiled"), + strdup("Make sure libbpf-devel is available at build time.")); + return -ENOTSUP; +} + +int parse_events_load_bpf(struct parse_events_state *parse_state, + struct list_head *list __maybe_unused, + char *bpf_file_name __maybe_unused, + bool source __maybe_unused, + struct list_head *head_config __maybe_unused) +{ + parse_events__handle_error(parse_state->error, 0, + strdup("BPF support is not compiled"), + strdup("Make sure libbpf-devel is available at build time.")); + return -ENOTSUP; +} +#endif // HAVE_LIBBPF_SUPPORT static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) -- cgit v1.2.3 From 38219f24116ace9b0e604f2ced9c7dbef3041058 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:00:51 -0300 Subject: perf tests: Skip the llvm and bpf tests if HAVE_LIBBPF_SUPPORT isn't defined If either NO_LIBBPF=1 is passed, explicitely disabling it or if libbpf is not available due to some missing dependency, skip its tests, telling the user the feature isn't available. # perf test 40: LLVM search and compile : Skip (not compiled in) 41: Session topology : Ok 42: BPF filter : Skip (not compiled in) Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 4 ++-- tools/perf/tests/llvm.c | 30 +++++++++++++++++++----------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index cd77e334e577..d880c588a951 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -9,12 +9,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include "tests.h" #include "llvm.h" @@ -25,6 +23,8 @@ #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test" #ifdef HAVE_LIBBPF_SUPPORT +#include +#include static int epoll_pwait_loop(void) { diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index ae6cda81c209..98da8a8757ab 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -2,13 +2,13 @@ #include #include #include -#include -#include -#include "llvm.h" #include "tests.h" #include "debug.h" #ifdef HAVE_LIBBPF_SUPPORT +#include +#include +#include "llvm.h" static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { struct bpf_object *obj; @@ -19,14 +19,6 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) bpf_object__close(obj); return TEST_OK; } -#else -static int test__bpf_parsing(void *obj_buf __maybe_unused, - size_t obj_buf_sz __maybe_unused) -{ - pr_debug("Skip bpf parsing\n"); - return TEST_OK; -} -#endif static struct { const char *source; @@ -170,3 +162,19 @@ const char *test__llvm_subtest_get_desc(int subtest) return bpf_source_table[subtest].desc; } +#else //HAVE_LIBBPF_SUPPORT +int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + return TEST_SKIP; +} + +int test__llvm_subtest_get_nr(void) +{ + return 0; +} + +const char *test__llvm_subtest_get_desc(int subtest __maybe_unused) +{ + return NULL; +} +#endif // HAVE_LIBBPF_SUPPORT -- cgit v1.2.3 From 20e88c6076fc50ebf0560e730349000ff2da94fd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:48:23 -0300 Subject: perf annotate: Move bpf header inclusion to inside HAVE_LIBBPF_SUPPORT No need to include it otherwise. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e52053a6ad42..ce8c07bc8c56 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -10,10 +10,6 @@ #include #include #include -#include -#include -#include -#include #include "util.h" // hex_width() #include "ui/ui.h" #include "sort.h" @@ -1684,6 +1680,10 @@ fallback: #define PACKAGE "perf" #include #include +#include +#include +#include +#include static int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args) -- cgit v1.2.3 From ef0580ecd8b0306acf09b7a7508d72cafc67896d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2020 15:57:21 -0300 Subject: perf env: Conditionally compile BPF support code on having HAVE_LIBBPF_SUPPORT If libbpf isn't selected, no need for a bunch of related code, that were not even being used, as code using these perf_env methods was also enclosed in HAVE_LIBBPF_SUPPORT. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 14 ++++++++++---- tools/perf/util/env.c | 15 ++++++++++++--- tools/perf/util/env.h | 4 ++-- tools/perf/util/header.c | 21 ++++++++------------- 4 files changed, 32 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 55c11e854fe4..89b5fd2b5de3 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -11,8 +11,10 @@ #include #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include #include "bpf-event.h" +#endif #include "compress.h" #include "env.h" #include "namespaces.h" @@ -728,6 +730,7 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) return false; } +#ifdef HAVE_LIBBPF_SUPPORT static ssize_t bpf_read(struct dso *dso, u64 offset, char *data) { struct bpf_prog_info_node *node; @@ -765,6 +768,7 @@ static int bpf_size(struct dso *dso) dso->data.file_size = node->info_linear->info.jited_prog_len; return 0; } +#endif // HAVE_LIBBPF_SUPPORT static void dso_cache__free(struct dso *dso) @@ -894,10 +898,12 @@ static struct dso_cache *dso_cache__populate(struct dso *dso, *ret = -ENOMEM; return NULL; } - +#ifdef HAVE_LIBBPF_SUPPORT if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) *ret = bpf_read(dso, cache_offset, cache->data); - else if (dso->binary_type == DSO_BINARY_TYPE__OOL) + else +#endif + if (dso->binary_type == DSO_BINARY_TYPE__OOL) *ret = DSO__DATA_CACHE_SIZE; else *ret = file_read(dso, machine, cache_offset, cache->data); @@ -1018,10 +1024,10 @@ int dso__data_file_size(struct dso *dso, struct machine *machine) if (dso->data.status == DSO_DATA_STATUS_ERROR) return -1; - +#ifdef HAVE_LIBBPF_SUPPORT if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) return bpf_size(dso); - +#endif return file_size(dso, machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index fadc59708ece..9130f6fad8d5 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -5,16 +5,18 @@ #include "util/header.h" #include #include -#include "bpf-event.h" #include "cgroup.h" #include #include -#include #include #include struct perf_env perf_env; +#ifdef HAVE_LIBBPF_SUPPORT +#include "bpf-event.h" +#include + void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { @@ -163,6 +165,11 @@ static void perf_env__purge_bpf(struct perf_env *env) up_write(&env->bpf_progs.lock); } +#else // HAVE_LIBBPF_SUPPORT +static void perf_env__purge_bpf(struct perf_env *env __maybe_unused) +{ +} +#endif // HAVE_LIBBPF_SUPPORT void perf_env__exit(struct perf_env *env) { @@ -197,11 +204,13 @@ void perf_env__exit(struct perf_env *env) zfree(&env->memory_nodes); } -void perf_env__init(struct perf_env *env) +void perf_env__init(struct perf_env *env __maybe_unused) { +#ifdef HAVE_LIBBPF_SUPPORT env->bpf_progs.infos = RB_ROOT; env->bpf_progs.btfs = RB_ROOT; init_rwsem(&env->bpf_progs.lock); +#endif } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index a12972652006..ca249bf5e984 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -77,7 +77,7 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; - +#ifdef HAVE_LIBBPF_SUPPORT /* * bpf_info_lock protects bpf rbtrees. This is needed because the * trees are accessed by different threads in perf-top @@ -89,7 +89,7 @@ struct perf_env { struct rb_root btfs; u32 btfs_cnt; } bpf_progs; - +#endif // HAVE_LIBBPF_SUPPORT /* same reason as above (for perf-top) */ struct { struct rw_semaphore lock; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index be850e9f8852..598285a21dad 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -19,7 +19,9 @@ #include #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include +#endif #include #include "dso.h" @@ -987,13 +989,6 @@ out: up_read(&env->bpf_progs.lock); return ret; } -#else // HAVE_LIBBPF_SUPPORT -static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused, - struct evlist *evlist __maybe_unused) -{ - return 0; -} -#endif // HAVE_LIBBPF_SUPPORT static int write_bpf_btf(struct feat_fd *ff, struct evlist *evlist __maybe_unused) @@ -1027,6 +1022,7 @@ out: up_read(&env->bpf_progs.lock); return ret; } +#endif // HAVE_LIBBPF_SUPPORT static int cpu_cache_level__sort(const void *a, const void *b) { @@ -1638,6 +1634,7 @@ static void print_dir_format(struct feat_fd *ff, FILE *fp) fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version); } +#ifdef HAVE_LIBBPF_SUPPORT static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) { struct perf_env *env = &ff->ph->env; @@ -1683,6 +1680,7 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp) up_read(&env->bpf_progs.lock); } +#endif // HAVE_LIBBPF_SUPPORT static void free_event_desc(struct evsel *events) { @@ -2938,12 +2936,6 @@ out: up_write(&env->bpf_progs.lock); return err; } -#else // HAVE_LIBBPF_SUPPORT -static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data __maybe_unused) -{ - return 0; -} -#endif // HAVE_LIBBPF_SUPPORT static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) { @@ -2990,6 +2982,7 @@ out: free(node); return err; } +#endif // HAVE_LIBBPF_SUPPORT static int process_compressed(struct feat_fd *ff, void *data __maybe_unused) @@ -3120,8 +3113,10 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), FEAT_OPR(CLOCKID, clockid, false), FEAT_OPN(DIR_FORMAT, dir_format, false), +#ifdef HAVE_LIBBPF_SUPPORT FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), +#endif FEAT_OPR(COMPRESSED, compressed, false), FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), FEAT_OPR(CLOCK_DATA, clock_data, false), -- cgit v1.2.3 From 1218838d68f5e9cc195685f17375be96a54832c7 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Oct 2020 15:24:21 +0900 Subject: perf kvm: Add kvm-stat for arm64 Add support for 'perf kvm stat' on arm64 platform. Example: # perf kvm stat report Analyze events for all VMs, all VCPUs: VM-EXIT Samples Samples% Time% Min Time Max Time Avg time DABT_LOW 661867 98.91% 40.45% 2.19us 3364.65us 6.24us ( +- 0.34% ) IRQ 4598 0.69% 57.44% 2.89us 3397.59us 1276.27us ( +- 1.61% ) WFx 1475 0.22% 1.71% 2.22us 3388.63us 118.31us ( +- 8.69% ) IABT_LOW 1018 0.15% 0.38% 2.22us 2742.07us 38.29us ( +- 12.55% ) SYS64 180 0.03% 0.01% 2.07us 112.91us 6.57us ( +- 14.95% ) HVC64 17 0.00% 0.01% 2.19us 322.35us 42.95us ( +- 58.98% ) Total Samples:669155, Total events handled time:10216387.86us. Signed-off-by: Sergey Senozhatsky Reviewed-by: Leo Yan Tested-by: Leo Yan Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: Suleiman Souhlal Link: http://lore.kernel.org/lkml/20201027062421.463355-1-sergey.senozhatsky@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/Makefile | 1 + tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/arm64_exception_types.h | 92 ++++++++++++++++++++++ tools/perf/arch/arm64/util/kvm-stat.c | 85 ++++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 tools/perf/arch/arm64/util/arm64_exception_types.h create mode 100644 tools/perf/arch/arm64/util/kvm-stat.c (limited to 'tools') diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index dbef716a1913..fab3095fb5d0 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_JITDUMP := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +HAVE_KVM_STAT_SUPPORT := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b53294d74b01..8d2b9bcfffca 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -2,6 +2,7 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o perf-y += tsc.o +perf-y += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h new file mode 100644 index 000000000000..27c981ebe401 --- /dev/null +++ b/tools/perf/arch/arm64/util/arm64_exception_types.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H +#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H + +/* Per asm/virt.h */ +#define HVC_STUB_ERR 0xbadca11 + +/* Per asm/kvm_asm.h */ +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_EL1_SERROR 1 +#define ARM_EXCEPTION_TRAP 2 +#define ARM_EXCEPTION_IL 3 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR + +#define kvm_arm_exception_type \ + {ARM_EXCEPTION_IRQ, "IRQ" }, \ + {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \ + {ARM_EXCEPTION_TRAP, "TRAP" }, \ + {ARM_EXCEPTION_IL, "ILLEGAL" }, \ + {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" } + +/* Per asm/esr.h */ +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 (0x18) +#define ESR_ELx_EC_SVE (0x19) +#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +/* Unallocated EC: 0x1b - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \ + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \ + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \ + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + +#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */ diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c new file mode 100644 index 000000000000..50376b9062c1 --- /dev/null +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "../../util/evsel.h" +#include "../../util/kvm-stat.h" +#include "arm64_exception_types.h" +#include "debug.h" + +define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type); +define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class); + +const char *kvm_trap_exit_reason = "esr_ec"; +const char *vcpu_id_str = "id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "ret"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + NULL, +}; + +static void event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->exit_reasons = arm64_exit_reasons; + + /* + * TRAP exceptions carry exception class info in esr_ec field + * and, hence, we need to use a different exit_reasons table to + * properly decode event's est_ec. + */ + if (key->key == ARM_EXCEPTION_TRAP) { + key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason); + key->exit_reasons = arm64_trap_exit_reasons; + } +} + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return !strcmp(evsel->name, kvm_entry_trace); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_exit_trace)) { + event_get_key(evsel, sample, key); + return true; + } + return false; +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { + .name = "vmexit", + .ops = &exit_events, + }, + { NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "arm64"; + return 0; +} -- cgit v1.2.3 From 9b0a7836359443227c9af101f7aea8412e739458 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 27 Oct 2020 16:28:54 +0900 Subject: perf test: Use generic event for expand_libpfm_events() I found that the UNHALTED_CORE_CYCLES event is only available in the Intel machines and it makes other vendors/archs fail on the test. As libpfm4 can parse the generic events like cycles, let's use them. Fixes: 40b74c30ffb9 ("perf test: Add expand cgroup event test") Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201027072855.655449-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expand-cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index d5771e4d094f..4c59f3ae438f 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -145,7 +145,7 @@ static int expand_libpfm_events(void) int ret; struct evlist *evlist; struct rblist metric_events; - const char event_str[] = "UNHALTED_CORE_CYCLES"; + const char event_str[] = "CYCLES"; struct option opt = { .value = &evlist, }; -- cgit v1.2.3 From bb1c15b60b981d1065d7766ccf9de6c32beedfa3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 27 Oct 2020 16:28:55 +0900 Subject: perf stat: Support regex pattern in --for-each-cgroup To make the command line even more compact with cgroups, support regex pattern matching in cgroup names. $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1 3,000.73 msec cpu-clock foo # 2.998 CPUs utilized 12,530,992,699 cycles foo # 7.517 GHz (100.00%) 1,000.61 msec cpu-clock foo/bar # 1.000 CPUs utilized 4,178,529,579 cycles foo/bar # 2.506 GHz (100.00%) 1,000.03 msec cpu-clock foo/baz # 0.999 CPUs utilized 4,176,104,315 cycles foo/baz # 2.505 GHz (100.00%) 1.000892614 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201027072855.655449-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 5 +- tools/perf/builtin-stat.c | 5 +- tools/perf/util/cgroup.c | 198 +++++++++++++++++++++++++++++---- 3 files changed, 182 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 9f9f29025e49..2b44c08b3b23 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -168,8 +168,9 @@ command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'. --for-each-cgroup name:: Expand event list for each cgroup in "name" (allow multiple cgroups separated -by comma). This has same effect that repeating -e option and -G option for -each event x name. This option cannot be used with -G/--cgroup option. +by comma). It also support regex patterns to match multiple groups. This has same +effect that repeating -e option and -G option for each event x name. This option +cannot be used with -G/--cgroup option. -o file:: --output file:: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b01af171d94f..6709578128c9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2235,8 +2235,11 @@ int cmd_stat(int argc, const char **argv) } if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, - &stat_config.metric_events, true) < 0) + &stat_config.metric_events, true) < 0) { + parse_options_usage(stat_usage, stat_options, + "for-each-cgroup", 0); goto out; + } } target__validate(&target); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index b81324a13a2b..704333748549 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -13,9 +13,19 @@ #include #include #include +#include +#include int nr_cgroups; +/* used to match cgroup name with patterns */ +struct cgroup_name { + struct list_head list; + bool used; + char name[]; +}; +static LIST_HEAD(cgroup_list); + static int open_cgroup(const char *name) { char path[PATH_MAX + 1]; @@ -149,6 +159,137 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup) evsel__set_default_cgroup(evsel, cgroup); } +/* helper function for ftw() in match_cgroups and list_cgroups */ +static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused, + int typeflag) +{ + struct cgroup_name *cn; + + if (typeflag != FTW_D) + return 0; + + cn = malloc(sizeof(*cn) + strlen(fpath) + 1); + if (cn == NULL) + return -1; + + cn->used = false; + strcpy(cn->name, fpath); + + list_add_tail(&cn->list, &cgroup_list); + return 0; +} + +static void release_cgroup_list(void) +{ + struct cgroup_name *cn; + + while (!list_empty(&cgroup_list)) { + cn = list_first_entry(&cgroup_list, struct cgroup_name, list); + list_del(&cn->list); + free(cn); + } +} + +/* collect given cgroups only */ +static int list_cgroups(const char *str) +{ + const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; + char *s; + + /* use given name as is - for testing purpose */ + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + if (e - str) { + int ret; + + s = strndup(str, e - str); + if (!s) + return -1; + /* pretend if it's added by ftw() */ + ret = add_cgroup_name(s, NULL, FTW_D); + free(s); + if (ret) + return -1; + } else { + if (add_cgroup_name("", NULL, FTW_D) < 0) + return -1; + } + + if (!p) + break; + str = p+1; + } + + /* these groups will be used */ + list_for_each_entry(cn, &cgroup_list, list) + cn->used = true; + + return 0; +} + +/* collect all cgroups first and then match with the pattern */ +static int match_cgroups(const char *str) +{ + char mnt[PATH_MAX]; + const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; + regex_t reg; + int prefix_len; + char *s; + + if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event")) + return -1; + + /* cgroup_name will have a full path, skip the root directory */ + prefix_len = strlen(mnt); + + /* collect all cgroups in the cgroup_list */ + if (ftw(mnt, add_cgroup_name, 20) < 0) + return -1; + + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + /* allow empty cgroups, i.e., skip */ + if (e - str) { + /* termination added */ + s = strndup(str, e - str); + if (!s) + return -1; + if (regcomp(®, s, REG_NOSUB)) { + free(s); + return -1; + } + + /* check cgroup name with the pattern */ + list_for_each_entry(cn, &cgroup_list, list) { + char *name = cn->name + prefix_len; + + if (name[0] == '/' && name[1]) + name++; + if (!regexec(®, name, 0, NULL, 0)) + cn->used = true; + } + regfree(®); + free(s); + } else { + /* first entry to root cgroup */ + cn = list_first_entry(&cgroup_list, struct cgroup_name, + list); + cn->used = true; + } + + if (!p) + break; + str = p+1; + } + return prefix_len; +} + int parse_cgroups(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -201,6 +342,11 @@ int parse_cgroups(const struct option *opt, const char *str, return 0; } +static bool has_pattern_string(const char *str) +{ + return !!strpbrk(str, "{}[]()|*+?^$"); +} + int evlist__expand_cgroup(struct evlist *evlist, const char *str, struct rblist *metric_events, bool open_cgroup) { @@ -208,8 +354,9 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, struct evsel *pos, *evsel, *leader; struct rblist orig_metric_events; struct cgroup *cgrp = NULL; - const char *p, *e, *eos = str + strlen(str); + struct cgroup_name *cn; int ret = -1; + int prefix_len; if (evlist->core.nr_entries == 0) { fprintf(stderr, "must define events before cgroups\n"); @@ -234,24 +381,27 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, rblist__init(&orig_metric_events); } - for (;;) { - p = strchr(str, ','); - e = p ? p : eos; + if (has_pattern_string(str)) + prefix_len = match_cgroups(str); + else + prefix_len = list_cgroups(str); - /* allow empty cgroups, i.e., skip */ - if (e - str) { - /* termination added */ - char *name = strndup(str, e - str); - if (!name) - goto out_err; + if (prefix_len < 0) + goto out_err; - cgrp = cgroup__new(name, open_cgroup); - free(name); - if (cgrp == NULL) - goto out_err; - } else { - cgrp = NULL; - } + list_for_each_entry(cn, &cgroup_list, list) { + char *name; + + if (!cn->used) + continue; + + /* cgroup_name might have a full path, skip the prefix */ + name = cn->name + prefix_len; + if (name[0] == '/' && name[1]) + name++; + cgrp = cgroup__new(name, open_cgroup); + if (cgrp == NULL) + goto out_err; leader = NULL; evlist__for_each_entry(orig_list, pos) { @@ -277,23 +427,25 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, if (metricgroup__copy_metric_events(tmp_list, cgrp, metric_events, &orig_metric_events) < 0) - break; + goto out_err; } perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; + } - if (!p) { - ret = 0; - break; - } - str = p+1; + if (list_empty(&evlist->core.entries)) { + fprintf(stderr, "no cgroup matched: %s\n", str); + goto out_err; } + ret = 0; + out_err: evlist__delete(orig_list); evlist__delete(tmp_list); rblist__exit(&orig_metric_events); + release_cgroup_list(); return ret; } -- cgit v1.2.3 From 55a4de94c64bacffbcd802c954764e0de2ab217f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Oct 2020 17:27:36 -0700 Subject: perf stat: Add --quiet option Add a new --quiet option to 'perf stat'. This is useful with 'perf stat record' to write the data only to the perf.data file, which can lower measurement overhead because the data doesn't need to be formatted. On my 4C desktop: % time ./perf stat record -e $(python -c 'print ",".join(["cycles"]*1000)') -a -I 1000 sleep 5 ... real 0m5.377s user 0m0.238s sys 0m0.452s % time ./perf stat record --quiet -e $(python -c 'print ",".join(["cycles"]*1000)') -a -I 1000 sleep 5 real 0m5.452s user 0m0.183s sys 0m0.423s In this example it cuts the user time by 20%. On systems with more cores the savings are higher. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Alexey Budankov Link: http://lore.kernel.org/lkml/20201027002737.30942-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++++ tools/perf/builtin-stat.c | 6 +++++- tools/perf/util/stat.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 2b44c08b3b23..5d4a673d7621 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -317,6 +317,10 @@ small group that need not have multiplexing is lowered. This option forbids the event merging logic from sharing events between groups and may be used to increase accuracy in this case. +--quiet:: +Don't print output. This is useful with perf stat record below to only +