summaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-06 16:30:14 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-06 16:30:14 -0800
commitac5eed2b41776b05cf03aac761d3bb5e64eea24c (patch)
treec9bf703ffaf0265fa1135f0dd6f65485184a3570 /tools/perf
parent574823bfab82d9d8fa47f422778043fbb4b4f50e (diff)
parent2573be22e5b6f24a0cabc97715c808c47e29eaaf (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling updates form Ingo Molnar: "A final batch of perf tooling changes: mostly fixes and small improvements" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits) perf session: Add comment for perf_session__register_idle_thread() perf thread-stack: Fix thread stack processing for the idle task perf thread-stack: Allocate an array of thread stacks perf thread-stack: Factor out thread_stack__init() perf thread-stack: Allow for a thread stack array perf thread-stack: Avoid direct reference to the thread's stack perf thread-stack: Tidy thread_stack__bottom() usage perf thread-stack: Simplify some code in thread_stack__process() tools gpio: Allow overriding CFLAGS tools power turbostat: Override CFLAGS assignments and add LDFLAGS to build command tools thermal tmon: Allow overriding CFLAGS assignments tools power x86_energy_perf_policy: Override CFLAGS assignments and add LDFLAGS to build command perf c2c: Increase the HITM ratio limit for displayed cachelines perf c2c: Change the default coalesce setup perf trace beauty ioctl: Beautify USBDEVFS_ commands perf trace beauty: Export function to get the files for a thread perf trace: Wire up ioctl's USBDEBFS_ cmd table generator perf beauty ioctl: Add generator for USBDEVFS_ ioctl commands tools headers uapi: Grab a copy of usbdevice_fs.h perf trace: Store the major number for a file when storing its pathname ...
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Makefile.config44
-rw-r--r--tools/perf/Makefile.perf8
-rw-r--r--tools/perf/builtin-c2c.c4
-rw-r--r--tools/perf/builtin-script.c21
-rw-r--r--tools/perf/builtin-trace.c142
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/trace/beauty/beauty.h7
-rw-r--r--tools/perf/trace/beauty/ioctl.c22
-rw-r--r--tools/perf/trace/beauty/mmap.c2
-rw-r--r--tools/perf/trace/beauty/seccomp.c4
-rwxr-xr-xtools/perf/trace/beauty/usbdevfs_ioctl.sh19
-rw-r--r--tools/perf/util/dump-insn.c8
-rw-r--r--tools/perf/util/dump-insn.h2
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c8
-rw-r--r--tools/perf/util/intel-pt.c6
-rw-r--r--tools/perf/util/python.c3
-rw-r--r--tools/perf/util/session.c7
-rw-r--r--tools/perf/util/thread-stack.c227
-rw-r--r--tools/perf/util/thread-stack.h8
20 files changed, 400 insertions, 147 deletions
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 07c1857c3d7a..b441c88cafa1 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -702,18 +702,20 @@ endif
ifeq ($(feature-libbfd), 1)
EXTLIBS += -lbfd
+else
+ # we are on a system that requires -liberty and (maybe) -lz
+ # to link against -lbfd; test each case individually here
# call all detections now so we get correct
# status in VF output
- $(call feature_check,liberty)
- $(call feature_check,liberty-z)
- $(call feature_check,cplus-demangle)
+ $(call feature_check,libbfd-liberty)
+ $(call feature_check,libbfd-liberty-z)
- ifeq ($(feature-liberty), 1)
- EXTLIBS += -liberty
+ ifeq ($(feature-libbfd-liberty), 1)
+ EXTLIBS += -lbfd -liberty
else
- ifeq ($(feature-liberty-z), 1)
- EXTLIBS += -liberty -lz
+ ifeq ($(feature-libbfd-liberty-z), 1)
+ EXTLIBS += -lbfd -liberty -lz
endif
endif
endif
@@ -723,24 +725,24 @@ ifdef NO_DEMANGLE
else
ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
EXTLIBS += -liberty
- CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
else
- ifneq ($(feature-libbfd), 1)
- ifneq ($(feature-liberty), 1)
- ifneq ($(feature-liberty-z), 1)
- # we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
- # or any of 'bfd iberty z' trinity
- ifeq ($(feature-cplus-demangle), 1)
- EXTLIBS += -liberty
- CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
- else
- msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
- CFLAGS += -DNO_DEMANGLE
- endif
- endif
+ ifeq ($(filter -liberty,$(EXTLIBS)),)
+ $(call feature_check,cplus-demangle)
+
+ # we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
+ # or any of 'bfd iberty z' trinity
+ ifeq ($(feature-cplus-demangle), 1)
+ EXTLIBS += -liberty
+ else
+ msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
+ CFLAGS += -DNO_DEMANGLE
endif
endif
endif
+
+ ifneq ($(filter -liberty,$(EXTLIBS)),)
+ CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
+ endif
endif
ifneq ($(filter -lbfd,$(EXTLIBS)),)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index bd23e3f30895..ff29c3372ec3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -497,6 +497,12 @@ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
$(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
+usbdevfs_ioctl_array := $(beauty_ioctl_outdir)/usbdevfs_ioctl_array.c
+usbdevfs_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/usbdevfs_ioctl.sh
+
+$(usbdevfs_ioctl_array): $(linux_uapi_dir)/usbdevice_fs.h $(usbdevfs_ioctl_tbl)
+ $(Q)$(SHELL) '$(usbdevfs_ioctl_tbl)' $(linux_uapi_dir) > $@
+
x86_arch_prctl_code_array := $(beauty_outdir)/x86_arch_prctl_code_array.c
x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh
@@ -624,6 +630,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(mount_flags_array) \
$(perf_ioctl_array) \
$(prctl_option_array) \
+ $(usbdevfs_ioctl_array) \
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
$(arch_errno_name_array)
@@ -923,6 +930,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
+ $(OUTPUT)$(usbdevfs_ioctl_array) \
$(OUTPUT)$(x86_arch_prctl_code_array) \
$(OUTPUT)$(rename_flags_array) \
$(OUTPUT)$(arch_errno_name_array)
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index f3aa9d02a5ab..d340d2e42776 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -68,7 +68,7 @@ struct c2c_hist_entry {
struct hist_entry he;
};
-static char const *coalesce_default = "pid,iaddr";
+static char const *coalesce_default = "iaddr";
struct perf_c2c {
struct perf_tool tool;
@@ -1878,7 +1878,7 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
return hpp_list__parse(&c2c_hists->list, output, sort);
}
-#define DISPLAY_LINE_LIMIT 0.0005
+#define DISPLAY_LINE_LIMIT 0.001
static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
{
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3728b50e52e2..d079f36d342d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1073,9 +1073,18 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
/*
* Print final block upto sample
+ *
+ * Due to pipeline delays the LBRs might be missing a branch
+ * or two, which can result in very large or negative blocks
+ * between final branch and sample. When this happens just
+ * continue walking after the last TO until we hit a branch.
*/
start = br->entries[0].to;
end = sample->ip;
+ if (end < start) {
+ /* Missing jump. Scan 128 bytes for the next branch */
+ end = start + 128;
+ }
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (len <= 0) {
@@ -1084,7 +1093,6 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
machine, thread, &x.is64bit, &x.cpumode, false);
if (len <= 0)
goto out;
-
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
dump_insn(&x, sample->ip, buffer, len, NULL));
if (PRINT_FIELD(SRCCODE))
@@ -1096,6 +1104,13 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
if (ilen == 0)
break;
+ if (arch_is_branch(buffer + off, len - off, x.is64bit) && start + off != sample->ip) {
+ /*
+ * Hit a missing branch. Just stop.
+ */
+ printed += fprintf(fp, "\t... not reaching sample ...\n");
+ break;
+ }
if (PRINT_FIELD(SRCCODE))
print_srccode(thread, x.cpumode, start + off);
}
@@ -1167,7 +1182,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
struct addr_location *al, FILE *fp)
{
struct perf_event_attr *attr = &evsel->attr;
- size_t depth = thread_stack__depth(thread);
+ size_t depth = thread_stack__depth(thread, sample->cpu);
const char *name = NULL;
static int spacing;
int len = 0;
@@ -1701,7 +1716,7 @@ static bool show_event(struct perf_sample *sample,
struct thread *thread,
struct addr_location *al)
{
- int depth = thread_stack__depth(thread);
+ int depth = thread_stack__depth(thread, sample->cpu);
if (!symbol_conf.graph_function)
return true;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ebde59e61133..adbf28183560 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -60,6 +60,7 @@
#include <linux/stringify.h>
#include <linux/time64.h>
#include <fcntl.h>
+#include <sys/sysmacros.h>
#include "sane_ctype.h"
@@ -112,8 +113,9 @@ struct trace {
} stats;
unsigned int max_stack;
unsigned int min_stack;
- bool sort_events;
+ int raw_augmented_syscalls_args_size;
bool raw_augmented_syscalls;
+ bool sort_events;
bool not_ev_qualifier;
bool live;
bool full_time;
@@ -283,12 +285,17 @@ out_delete:
return -ENOENT;
}
-static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
+static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel, struct perf_evsel *tp)
{
struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
- if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
- if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
+ if (evsel->priv != NULL) {
+ struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
+ if (syscall_id == NULL)
+ syscall_id = perf_evsel__field(tp, "__syscall_nr");
+ if (syscall_id == NULL)
+ goto out_delete;
+ if (__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
goto out_delete;
return 0;
@@ -974,9 +981,9 @@ struct thread_trace {
char *name;
} filename;
struct {
- int max;
- char **table;
- } paths;
+ int max;
+ struct file *table;
+ } files;
struct intlist *syscall_stats;
};
@@ -986,7 +993,7 @@ static struct thread_trace *thread_trace__new(void)
struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
if (ttrace)
- ttrace->paths.max = -1;
+ ttrace->files.max = -1;
ttrace->syscall_stats = intlist__new(NULL);
@@ -1030,30 +1037,48 @@ void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
static const size_t trace__entry_str_size = 2048;
-static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
+static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
{
- struct thread_trace *ttrace = thread__priv(thread);
-
- if (fd > ttrace->paths.max) {
- char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
+ if (fd > ttrace->files.max) {
+ struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
- if (npath == NULL)
- return -1;
+ if (nfiles == NULL)
+ return NULL;
- if (ttrace->paths.max != -1) {
- memset(npath + ttrace->paths.max + 1, 0,
- (fd - ttrace->paths.max) * sizeof(char *));
+ if (ttrace->files.max != -1) {
+ memset(nfiles + ttrace->files.max + 1, 0,
+ (fd - ttrace->files.max) * sizeof(struct file));
} else {
- memset(npath, 0, (fd + 1) * sizeof(char *));
+ memset(nfiles, 0, (fd + 1) * sizeof(struct file));
}
- ttrace->paths.table = npath;
- ttrace->paths.max = fd;
+ ttrace->files.table = nfiles;
+ ttrace->files.max = fd;
}
- ttrace->paths.table[fd] = strdup(pathname);
+ return ttrace->files.table + fd;
+}
- return ttrace->paths.table[fd] != NULL ? 0 : -1;
+struct file *thread__files_entry(struct thread *thread, int fd)
+{
+ return thread_trace__files_entry(thread__priv(thread), fd);
+}
+
+static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
+{
+ struct thread_trace *ttrace = thread__priv(thread);
+ struct file *file = thread_trace__files_entry(ttrace, fd);
+
+ if (file != NULL) {
+ struct stat st;
+ if (stat(pathname, &st) == 0)
+ file->dev_maj = major(st.st_rdev);
+ file->pathname = strdup(pathname);
+ if (file->pathname)
+ return 0;
+ }
+
+ return -1;
}
static int thread__read_fd_path(struct thread *thread, int fd)
@@ -1093,7 +1118,7 @@ static const char *thread__fd_path(struct thread *thread, int fd,
if (fd < 0)
return NULL;
- if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
+ if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
if (!trace->live)
return NULL;
++trace->stats.proc_getname;
@@ -1101,7 +1126,7 @@ static const char *thread__fd_path(struct thread *thread, int fd,
return NULL;
}
- return ttrace->paths.table[fd];
+ return ttrace->files.table[fd].pathname;
}
size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
@@ -1140,8 +1165,8 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
struct thread_trace *ttrace = thread__priv(arg->thread);
- if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
- zfree(&ttrace->paths.table[fd]);
+ if (ttrace && fd >= 0 && fd <= ttrace->files.max)
+ zfree(&ttrace->files.table[fd].pathname);
return printed;
}
@@ -1768,16 +1793,16 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
return printed;
}
-static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
{
void *augmented_args = NULL;
/*
* For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
- * and there we get all 6 syscall args plus the tracepoint common
- * fields (sizeof(long)) and the syscall_nr (another long). So we check
- * if that is the case and if so don't look after the sc->args_size,
- * but always after the full raw_syscalls:sys_enter payload, which is
- * fixed.
+ * and there we get all 6 syscall args plus the tracepoint common fields
+ * that gets calculated at the start and the syscall_nr (another long).
+ * So we check if that is the case and if so don't look after the
+ * sc->args_size but always after the full raw_syscalls:sys_enter payload,
+ * which is fixed.
*
* We'll revisit this later to pass s->args_size to the BPF augmenter
* (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
@@ -1785,7 +1810,7 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
* use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
* traffic to just what is needed for each syscall.
*/
- int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;
+ int args_size = raw_augmented_args_size ?: sc->args_size;
*augmented_args_size = sample->raw_size - args_size;
if (*augmented_args_size > 0)
@@ -1839,7 +1864,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
* here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
*/
if (evsel != trace->syscalls.events.sys_enter)
- augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
ttrace->entry_time = sample->time;
msg = ttrace->entry_str;
printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
@@ -1897,7 +1922,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
goto out_put;
args = perf_evsel__sc_tp_ptr(evsel, args, sample);
- augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
fprintf(trace->output, "%s", msg);
err = 0;
@@ -2686,7 +2711,9 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
{
if (trace->syscalls.map)
return trace__set_ev_qualifier_bpf_filter(trace);
- return trace__set_ev_qualifier_tp_filter(trace);
+ if (trace->syscalls.events.sys_enter)
+ return trace__set_ev_qualifier_tp_filter(trace);
+ return 0;
}
static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
@@ -3812,13 +3839,6 @@ int cmd_trace(int argc, const char **argv)
* syscall.
*/
if (trace.syscalls.events.augmented) {
- evsel = trace.syscalls.events.augmented;
-
- if (perf_evsel__init_augmented_syscall_tp(evsel) ||
- perf_evsel__init_augmented_syscall_tp_args(evsel))
- goto out;
- evsel->handler = trace__sys_enter;
-
evlist__for_each_entry(trace.evlist, evsel) {
bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
@@ -3827,9 +3847,41 @@ int cmd_trace(int argc, const char **argv)
goto init_augmented_syscall_tp;
}
+ if (strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_enter") == 0) {
+ struct perf_evsel *augmented = trace.syscalls.events.augmented;
+ if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
+ perf_evsel__init_augmented_syscall_tp_args(augmented))
+ goto out;
+ augmented->handler = trace__sys_enter;
+ }
+
if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+ struct syscall_tp *sc;
init_augmented_syscall_tp:
- perf_evsel__init_augmented_syscall_tp(evsel);
+ if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
+ goto out;
+ sc = evsel->priv;
+ /*
+ * For now with BPF raw_augmented we hook into
+ * raw_syscalls:sys_enter and there we get all
+ * 6 syscall args plus the tracepoint common
+ * fields and the syscall_nr (another long).
+ * So we check if that is the case and if so
+ * don't look after the sc->args_size but
+ * always after the full raw_syscalls:sys_enter
+ * payload, which is fixed.
+ *
+ * We'll revisit this later to pass
+ * s->args_size to the BPF augmenter (now
+ * tools/perf/examples/bpf/augmented_raw_syscalls.c,
+ * so that it copies only what we need for each
+ * syscall, like what happens when we use
+ * syscalls:sys_enter_NAME, so that we reduce
+ * the kernel/userspace traffic to just what is
+ * needed for each syscall.
+ */
+ if (trace.raw_augmented_syscalls)
+ trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
perf_evsel__init_augmented_syscall_tp_ret(evsel);
evsel->handler = trace__sys_exit;
}
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 8e811ea0cf85..6cb98f8570a2 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -14,6 +14,7 @@ include/uapi/linux/perf_event.h
include/uapi/linux/prctl.h
include/uapi/linux/sched.h
include/uapi/linux/stat.h
+include/uapi/linux/usbdevice_fs.h
include/uapi/linux/vhost.h
include/uapi/sound/asound.h
include/linux/bits.h
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 83c5b202e00e..139d485a6f16 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -32,6 +32,13 @@ size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, boo
struct trace;
struct thread;
+struct file {
+ char *pathname;
+ int dev_maj;
+};
+
+struct file *thread__files_entry(struct thread *thread, int fd);
+
struct strarrays {
int nr_entries;
struct strarray **entries;
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 9efeb6a936c2..620350d41209 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -112,6 +112,17 @@ static size_t ioctl__scnprintf_perf_cmd(int nr, int dir, char *bf, size_t size)
return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir);
}
+static size_t ioctl__scnprintf_usbdevfs_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/usbdevfs_ioctl_array.c"
+ static DEFINE_STRARRAY(usbdevfs_ioctl_cmds, "");
+
+ if (nr < strarray__usbdevfs_ioctl_cmds.nr_entries && strarray__usbdevfs_ioctl_cmds.entries[nr] != NULL)
+ return scnprintf(bf, size, "USBDEVFS_%s", strarray__usbdevfs_ioctl_cmds.entries[nr]);
+
+ return scnprintf(bf, size, "(%c, %#x, %#x)", 'U', nr, dir);
+}
+
static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, bool show_prefix)
{
const char *prefix = "_IOC_";
@@ -157,9 +168,20 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo
return printed + scnprintf(bf + printed, size - printed, ", %#x, %#x, %#x)", type, nr, sz);
}
+#ifndef USB_DEVICE_MAJOR
+#define USB_DEVICE_MAJOR 189
+#endif // USB_DEVICE_MAJOR
+
size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg)
{
unsigned long cmd = arg->val;
+ unsigned int fd = syscall_arg__val(arg, 0);
+ struct file *file = thread__files_entry(arg->thread, fd);
+
+ if (file != NULL) {
+ if (file->dev_maj == USB_DEVICE_MAJOR)
+ return ioctl__scnprintf_usbdevfs_cmd(_IOC_NR(cmd), _IOC_DIR(cmd), bf, size);
+ }
return ioctl__scnprintf_cmd(cmd, bf, size, arg->show_string_prefix);
}
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index eb31089790e3..859a8a9db2c6 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -18,8 +18,8 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
}
P_MMAP_PROT(READ);
- P_MMAP_PROT(EXEC);
P_MMAP_PROT(WRITE);
+ P_MMAP_PROT(EXEC);
P_MMAP_PROT(SEM);
P_MMAP_PROT(GROWSDOWN);
P_MMAP_PROT(GROWSUP);
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
index 4600c28a3cfe..637722e2796b 100644
--- a/tools/perf/trace/beauty/seccomp.c
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -9,7 +9,7 @@
static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
- const char *prefix = "SECOMP_SET_MODE_";
+ const char *prefix = "SECCOMP_SET_MODE_";
int op = arg->val;
size_t printed = 0;
@@ -34,7 +34,7 @@ static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
- const char *prefix = "SECOMP_FILTER_FLAG_";
+ const char *prefix = "SECCOMP_FILTER_FLAG_";
int printed = 0, flags = arg->val;
#define P_FLAG(n) \
diff --git a/tools/perf/trace/beauty/usbdevfs_ioctl.sh b/tools/perf/trace/beauty/usbdevfs_ioctl.sh
new file mode 100755
index 000000000000..930b80f422e8
--- /dev/null
+++ b/tools/perf/trace/beauty/usbdevfs_ioctl.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *usbdevfs_ioctl_cmds[] = {\n"
+regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
+egrep $regex ${header_dir}/usbdevice_fs.h | egrep -v 'USBDEVFS_\w+32[[:space:]]' | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n\n"
+printf "#if 0\n"
+printf "static const char *usbdevfs_ioctl_32_cmds[] = {\n"
+regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
+egrep $regex ${header_dir}/usbdevice_fs.h | egrep 'USBDEVFS_\w+32[[:space:]]' | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
+printf "#endif\n"
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
index 10988d3de7ce..2bd8585db93c 100644
--- a/tools/perf/util/dump-insn.c
+++ b/tools/perf/util/dump-insn.c
@@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
*lenp = 0;
return "?";
}
+
+__weak
+int arch_is_branch(const unsigned char *buf __maybe_unused,
+ size_t len __maybe_unused,
+ int x86_64 __maybe_unused)
+{
+ return 0;
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 0e06280a8860..650125061530 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -20,4 +20,6 @@ struct perf_insn {
const char *dump_insn(struct perf_insn *x, u64 ip,
u8 *inbuf, int inlen, int *lenp);
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+
#endif
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7b27d77306c2..ee6ca65f81f4 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
continue;
intel_bts_get_branch_type(btsq, branch);
if (btsq->bts->synth_opts.thread_stack)
- thread_stack__event(thread, btsq->sample_flags,
+ thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
le64_to_cpu(branch->from),
le64_to_cpu(branch->to),
btsq->intel_pt_insn.length,
@@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
!btsq->bts->synth_opts.thread_stack && thread &&
(!old_buffer || btsq->bts->sampling_mode ||
(btsq->bts->snapshot_mode && !buffer->consecutive)))
- thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
+ thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
err = intel_bts_process_buffer(btsq, buffer, thread);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 54818828023b..1c0e289f01e6 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
return 0;
}
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+{
+ struct intel_pt_insn in;
+ if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
+ return -1;
+ return in.branch != INTEL_PT_BR_NO_BRANCH;
+}
+
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
u8 *inbuf, int inlen, int *lenp)
{
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 149ff361ca78..2e72373ec6df 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
intel_pt_prep_b_sample(pt, ptq, event, sample);
if (pt->synth_opts.callchain) {
- thread_stack__sample(ptq->thread, ptq->chain,
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
pt->synth_opts.callchain_sz + 1,
sample->ip, pt->kernel_start);
sample->callchain = ptq->chain;
@@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return 0;
if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
- thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
+ thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
else
- thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
+ thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 47628e85c5eb..dda0ac978b1e 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -939,7 +939,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
file = PyFile_FromFile(fp, "perf", "r", NULL);
#else
- file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1);
+ file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
+ NULL, NULL, NULL, 0);
#endif
if (file == NULL)
goto free_list;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 78a067777144..5456c84c7dd1 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1527,6 +1527,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
+/*
+ * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
+ * So here a single thread is created for that, but actually there is a separate
+ * idle task per cpu, so there should be one 'struct thread' per cpu, but there
+ * is only 1. That causes problems for some tools, requiring workarounds. For
+ * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
+ */
int perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 61a4286a74dc..d52f27f373ce 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -15,6 +15,7 @@
#include <linux/rbtree.h>
#include <linux/list.h>
+#include <linux/log2.h>
#include <errno.h>
#include "thread.h"
#include "event.h"
@@ -60,6 +61,7 @@ struct thread_stack_entry {
* @last_time: last timestamp
* @crp: call/return processor
* @comm: current comm
+ * @arr_sz: size of array if this is the first element of an array
*/
struct thread_stack {
struct thread_stack_entry *stack;
@@ -71,8 +73,19 @@ struct thread_stack {
u64 last_time;
struct call_return_processor *crp;
struct comm *comm;
+ unsigned int arr_sz;
};
+/*
+ * Assume pid == tid == 0 identifies the idle task as defined by
+ * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
+ * and therefore requires a stack for each cpu.
+ */
+static inline bool thread_stack__per_cpu(struct thread *thread)
+{
+ return !(thread->tid || thread->pid_);
+}
+
static int thread_stack__grow(struct thread_stack *ts)
{
struct thread_stack_entry *new_stack;
@@ -91,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts)
return 0;
}
-static struct thread_stack *thread_stack__new(struct thread *thread,
- struct call_return_processor *crp)
+static int thread_stack__in