summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-15 11:17:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-15 11:17:15 -0700
commit713eee84720e6525bc5b65954c5087604a15f5e8 (patch)
treea95c09841561bf97e005bded0300e7e3d39da5b0
parent50f6c7dbd973092d8e5f3c89f29eb4bea19fdebd (diff)
parent492e4edba6e2fc0620a69266d33f29c4a1f9ac1e (diff)
Merge tag 'perf-tools-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tools updates from Arnaldo Carvalho de Melo: "Fixes: - Fixes for 'perf bench numa'. - Always memset source before memcpy in 'perf bench mem'. - Quote CC and CXX for their arguments to fix build in environments using those variables to pass more than just the compiler names. - Fix module symbol processing, addressing regression detected via "perf test". - Allow multiple probes in record+script_probe_vfs_getname.sh 'perf test' entry. Improvements: - Add script to autogenerate socket family name id->string table from copy of kernel header, used so far in 'perf trace'. - 'perf ftrace' improvements to provide similar options for this utility so that one can go from 'perf record', 'perf trace', etc to 'perf ftrace' just by changing the name of the subcommand. - Prefer new "sched:sched_waking" trace event when it exists in 'perf sched' post processing. - Update POWER9 metrics to utilize other metrics. - Fall back to querying debuginfod if debuginfo not found locally. Miscellaneous: - Sync various kvm headers with kernel sources" * tag 'perf-tools-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (40 commits) perf ftrace: Make option description initials all capital letters perf build-ids: Fall back to debuginfod query if debuginfo not found perf bench numa: Remove dead code in parse_nodes_opt() perf stat: Update POWER9 metrics to utilize other metrics perf ftrace: Add change log perf: ftrace: Add set_tracing_options() to set all trace options perf ftrace: Add option --tid to filter by thread id perf ftrace: Add option -D/--delay to delay tracing perf: ftrace: Allow set graph depth by '--graph-opts' perf ftrace: Add support for trace option tracing_thresh perf ftrace: Add option 'verbose' to show more info for graph tracer perf ftrace: Add support for tracing option 'irq-info' perf ftrace: Add support for trace option funcgraph-irqs perf ftrace: Add support for trace option sleep-time perf ftrace: Add support for tracing option 'func_stack_trace' perf tools: Add general function to parse sublevel options perf ftrace: Add option '--inherit' to trace children processes perf ftrace: Show trace column header perf ftrace: Add option '-m/--buffer-size' to set per-cpu buffer size perf ftrace: Factor out function write_tracing_file_int() ...
-rw-r--r--MAINTAINERS1
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h7
-rw-r--r--tools/build/Makefile.feature5
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-libdebuginfod.c8
-rw-r--r--tools/include/uapi/linux/kvm.h4
-rw-r--r--tools/include/uapi/linux/vhost.h2
-rw-r--r--tools/lib/perf/Documentation/libperf-counting.txt14
-rw-r--r--tools/lib/perf/Documentation/libperf-sampling.txt13
-rw-r--r--tools/lib/perf/Documentation/libperf.txt4
-rw-r--r--tools/perf/Documentation/perf-config.txt5
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt75
-rw-r--r--tools/perf/Makefile.config8
-rw-r--r--tools/perf/Makefile.perf11
-rw-r--r--tools/perf/bench/find-bit-bench.c4
-rw-r--r--tools/perf/bench/mem-functions.c21
-rw-r--r--tools/perf/bench/numa.c77
-rw-r--r--tools/perf/builtin-ftrace.c436
-rw-r--r--tools/perf/builtin-sched.c32
-rwxr-xr-xtools/perf/check-headers.sh3
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/metrics.json48
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh4
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h442
-rw-r--r--tools/perf/trace/beauty/sockaddr.c9
-rwxr-xr-xtools/perf/trace/beauty/socket.sh24
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/build-id.c19
-rw-r--r--tools/perf/util/debug.c61
-rw-r--r--tools/perf/util/dso.c2
-rw-r--r--tools/perf/util/dso.h10
-rw-r--r--tools/perf/util/header.c13
-rw-r--r--tools/perf/util/machine.c16
-rw-r--r--tools/perf/util/map.c4
-rw-r--r--tools/perf/util/parse-sublevel-options.c70
-rw-r--r--tools/perf/util/parse-sublevel-options.h11
-rw-r--r--tools/perf/util/symbol-elf.c8
-rw-r--r--tools/perf/util/symbol.c24
37 files changed, 1253 insertions, 247 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index b0a742ce8f2c..deaafb617361 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13566,6 +13566,7 @@ F: arch/*/kernel/perf_event*.c
F: include/linux/perf_event.h
F: include/uapi/linux/perf_event.h
F: kernel/events/*
+F: tools/lib/perf/
F: tools/perf/
PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 436ec7636927..7a6b14874d65 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
- KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+ KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
@@ -264,7 +266,8 @@ struct kvm_sync_regs {
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
- __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ __u64 diag318; /* diagnose 0x318 info */
+ __u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 774f0b0ca28a..c1daf4d57518 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -8,7 +8,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC=$(CC) CXX=$(CXX) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
@@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA := \
llvm-version \
clang \
libbpf \
- libpfm4
+ libpfm4 \
+ libdebuginfod
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 846ee1341a5c..d220fe952747 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -26,6 +26,7 @@ FILES= \
test-libelf-gelf_getnote.bin \
test-libelf-getshdrstrndx.bin \
test-libelf-mmap.bin \
+ test-libdebuginfod.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
test-libperl.bin \
@@ -157,6 +158,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin:
$(OUTPUT)test-libelf-getshdrstrndx.bin:
$(BUILD) -lelf
+$(OUTPUT)test-libdebuginfod.bin:
+ $(BUILD) -ldebuginfod
+
$(OUTPUT)test-libnuma.bin:
$(BUILD) -lnuma
diff --git a/tools/build/feature/test-libdebuginfod.c b/tools/build/feature/test-libdebuginfod.c
new file mode 100644
index 000000000000..da22548b8413
--- /dev/null
+++ b/tools/build/feature/test-libdebuginfod.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/debuginfod.h>
+
+int main(void)
+{
+ debuginfod_client* c = debuginfod_begin();
+ return (long)c;
+}
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 4fdf30316582..f6d86033c4fa 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -289,6 +289,7 @@ struct kvm_run {
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
+ __u32 cpu;
} fail_entry;
/* KVM_EXIT_EXCEPTION */
struct {
@@ -1031,6 +1032,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_SECURE_GUEST 181
#define KVM_CAP_HALT_POLL 182
#define KVM_CAP_ASYNC_PF_INT 183
+#define KVM_CAP_LAST_CPU 184
+#define KVM_CAP_SMALLER_MAXPHYADDR 185
+#define KVM_CAP_S390_DIAG318 186
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 0c2349612e77..75232185324a 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -91,6 +91,8 @@
/* Use message type V2 */
#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+/* IOTLB can accept batching hints */
+#define VHOST_BACKEND_F_IOTLB_BATCH 0x2
#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt
index cae9757f49c1..8b75efcd67ce 100644
--- a/tools/lib/perf/Documentation/libperf-counting.txt
+++ b/tools/lib/perf/Documentation/libperf-counting.txt
@@ -7,13 +7,13 @@ libperf-counting - counting interface
DESCRIPTION
-----------
-The counting interface provides API to meassure and get count for specific perf events.
+The counting interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `counting.c` example.
It is by no means complete guide to counting, but shows libperf basic API for counting.
-The `counting.c` comes with libbperf package and can be compiled and run like:
+The `counting.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
@@ -26,7 +26,8 @@ count 176242, enabled 176242, run 176242
It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
which is available only for root.
-The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it:
+The `counting.c` example monitors two events on the current process and displays
+their count, in a nutshell it:
* creates events
* adds them to the event list
@@ -152,7 +153,7 @@ Configure event list with the thread map and open events:
--
Both events are created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the whole list explicitely (both events).
+so we need to enable the whole list explicitly (both events).
From this moment events are counting and we can do our workload.
@@ -167,7 +168,8 @@ When we are done we disable the events list.
79 perf_evlist__disable(evlist);
--
-Now we need to get the counts from events, following code iterates throught the events list and read counts:
+Now we need to get the counts from events, following code iterates through the
+events list and read counts:
[source,c]
--
@@ -178,7 +180,7 @@ Now we need to get the counts from events, following code iterates throught the
85 }
--
-And finaly cleanup.
+And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d71a7b4fcf5f..d6ca24f6ef78 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -8,13 +8,13 @@ libperf-sampling - sampling interface
DESCRIPTION
-----------
-The sampling interface provides API to meassure and get count for specific perf events.
+The sampling interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `sampling.c` example.
It is by no means complete guide to sampling, but shows libperf basic API for sampling.
-The `sampling.c` comes with libbperf package and can be compiled and run like:
+The `sampling.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
@@ -33,7 +33,8 @@ cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 176
It requires root access, because it uses hardware cycles event.
-The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
+The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
+nutshell it:
- creates events
- adds them to the event list
@@ -90,7 +91,7 @@ Once the setup is complete we start by defining cycles event using the `struct p
36 };
--
-Next step is to prepare cpus map.
+Next step is to prepare CPUs map.
In this case we will monitor all the available CPUs:
@@ -152,7 +153,7 @@ Once the events list is open, we can create memory maps AKA perf ring buffers:
--
The event is created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the events list explicitely.
+so we need to enable the events list explicitly.
From this moment the cycles event is sampling.
@@ -212,7 +213,7 @@ Each sample needs to get parsed:
106 cpu, pid, tid, ip, period);
--
-And finaly cleanup.
+And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 5a6bb512789d..0c74c30ed23a 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -29,7 +29,7 @@ SYNOPSIS
void libperf_init(libperf_print_fn_t fn);
--
-*API to handle cpu maps:*
+*API to handle CPU maps:*
[source,c]
--
@@ -217,7 +217,7 @@ Following objects are key to the libperf interface:
[horizontal]
-struct perf_cpu_map:: Provides a cpu list abstraction.
+struct perf_cpu_map:: Provides a CPU list abstraction.
struct perf_thread_map:: Provides a thread list abstraction.
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c7d3df5798e2..76408d986aed 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -614,8 +614,9 @@ trace.*::
ftrace.*::
ftrace.tracer::
- Can be used to select the default tracer. Possible values are
- 'function' and 'function_graph'.
+ Can be used to select the default tracer when neither -G nor
+ -F option is not specified. Possible values are 'function' and
+ 'function_graph'.
llvm.*::
llvm.clang-path::
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index b80c84307dc9..78358af9a1c4 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -24,16 +24,28 @@ OPTIONS
-t::
--tracer=::
- Tracer to use: function_graph or function.
+ Tracer to use when neither -G nor -F option is not
+ specified: function_graph or function.
-v::
--verbose=::
Verbosity level.
+-F::
+--funcs::
+ List all available functions to trace.
+
-p::
--pid=::
Trace on existing process id (comma separated list).
+--tid=::
+ Trace on existing thread id (comma separated list).
+
+-D::
+--delay::
+ Time (ms) to wait before starting tracing after program start.
+
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
@@ -48,39 +60,58 @@ OPTIONS
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
+-m::
+--buffer-size::
+ Set the size of per-cpu tracing buffer, <size> is expected to
+ be a number with appended unit character - B/K/M/G.
+
+--inherit::
+ Trace children processes spawned by our target.
+
-T::
--trace-funcs=::
- Only trace functions given by the argument. Multiple functions
- can be given by using this option more than once. The function
- argument also can be a glob pattern. It will be passed to
- 'set_ftrace_filter' in tracefs.
+ Select function tracer and set function filter on the given
+ function (or a glob pattern). Multiple functions can be given
+ by using this option more than once. The function argument also
+ can be a glob pattern. It will be passed to 'set_ftrace_filter'
+ in tracefs.
-N::
--notrace-funcs=::
- Do not trace functions given by the argument. Like -T option,
- this can be used more than once to specify multiple functions
- (or glob patterns). It will be passed to 'set_ftrace_notrace'
- in tracefs.
+ Select function tracer and do not trace functions given by the
+ argument. Like -T option, this can be used more than once to
+ specify multiple functions (or glob patterns). It will be
+ passed to 'set_ftrace_notrace' in tracefs.
+
+--func-opts::
+ List of options allowed to set:
+ call-graph - Display kernel stack trace for function tracer.
+ irq-info - Display irq context info for function tracer.
-G::
--graph-funcs=::
- Set graph filter on the given function (or a glob pattern).
- This is useful for the function_graph tracer only and enables
- tracing for functions executed from the given function.
- This can be used more than once to specify multiple functions.
- It will be passed to 'set_graph_function' in tracefs.
+ Select function_graph tracer and set graph filter on the given
+ function (or a glob pattern). This is useful to trace for
+ functions executed from the given function. This can be used more
+ than once to specify multiple functions. It will be passed to
+ 'set_graph_function' in tracefs.
-g::
--nograph-funcs=::
- Set graph notrace filter on the given function (or a glob pattern).
- Like -G option, this is useful for the function_graph tracer only
- and disables tracing for function executed from the given function.
- This can be used more than once to specify multiple functions.
- It will be passed to 'set_graph_notrace' in tracefs.
+ Select function_graph tracer and set graph notrace filter on the
+ given function (or a glob pattern). Like -G option, this is useful
+ for the function_graph tracer only and disables tracing for function
+ executed from the given function. This can be used more than once to
+ specify multiple functions. It will be passed to 'set_graph_notrace'
+ in tracefs.
--D::
---graph-depth=::
- Set max depth for function graph tracer to follow
+--graph-opts::
+ List of options allowed to set:
+ nosleep-time - Measure on-CPU time only for function_graph tracer.
+ noirqs - Ignore functions that happen inside interrupt.
+ verbose - Show process names, PIDs, timestamps, etc.
+ thresh=<n> - Setup trace duration threshold in microseconds.
+ depth=<n> - Set max depth for function graph tracer to follow.
SEE ALSO
--------
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 513633809c81..190be4fa5c21 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -501,6 +501,14 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
endif
+ ifndef NO_LIBDEBUGINFOD
+ $(call feature_check,libdebuginfod)
+ ifeq ($(feature-libdebuginfod), 1)
+ CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT
+ EXTLIBS += -ldebuginfod
+ endif
+ endif
+
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 86dbb51bb272..6031167939ae 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -124,6 +124,8 @@ include ../scripts/utilities.mak
#
# Define LIBPFM4 to enable libpfm4 events extension.
#
+# Define NO_LIBDEBUGINFOD if you do not want support debuginfod
+#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@@ -418,6 +420,7 @@ export INSTALL SHELL_PATH
SHELL = $(SHELL_PATH)
+beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
@@ -501,6 +504,12 @@ socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
+socket_arrays := $(beauty_outdir)/socket_arrays.c
+socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
+
+$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
+ $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
+
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -697,6 +706,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(kcmp_type_array) \
$(kvm_ioctl_array) \
$(socket_ipproto_array) \
+ $(socket_arrays) \
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
$(mmap_flags_array) \
@@ -1006,6 +1016,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(kvm_ioctl_array) \
$(OUTPUT)$(kcmp_type_array) \
$(OUTPUT)$(socket_ipproto_array) \
+ $(OUTPUT)$(socket_arrays) \
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
index fa90f3e9d368..73b5bcc5946a 100644
--- a/tools/perf/bench/find-bit-bench.c
+++ b/tools/perf/bench/find-bit-bench.c
@@ -17,9 +17,9 @@ static unsigned int inner_iterations = 100000;
static const struct option options[] = {
OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
- "Number of outerer iterations used"),
+ "Number of outer iterations used"),
OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
- "Number of outerer iterations used"),
+ "Number of inner iterations used"),
OPT_END()
};
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9235b76501be..19d45c377ac1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 0;
}
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
- memcpy_t fn = r->fn.memcpy;
- int i;
-
/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
memset(src, 0, size);
@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
* to not measure page fault overhead:
*/
fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+ u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ memcpy_t fn = r->fn.memcpy;
+ int i;
+
+ memcpy_prefault(fn, size, src, dst);
cycle_start = get_cycles();
for (i = 0; i < nr_loops; ++i)
@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
memcpy_t fn = r->fn.memcpy;
int i;
- /*
- * We prefault the freshly allocated memory range here,
- * to not measure page fault overhead:
- */
- fn(dst, src, size);
+ memcpy_prefault(fn, size, src, dst);
BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < nr_loops; ++i)
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5797253b9700..f85bceccc459 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -247,17 +247,22 @@ static int is_node_present(int node)
*/
static bool node_has_cpus(int node)
{
- struct bitmask *cpu = numa_allocate_cpumask();
- unsigned int i;
+ struct bitmask *cpumask = numa_allocate_cpumask();
+ bool ret = false; /* fall back to nocpus */
+ int cpu;
- if (cpu && !numa_node_to_cpus(node, cpu)) {
- for (i = 0; i < cpu->size; i++) {
- if (numa_bitmask_isbitset(cpu, i))
- return true;
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu)) {
+ ret = true;
+ break;
+ }
}
}
+ numa_free_cpumask(cpumask);
- return false; /* lets fall back to nocpus safely */
+ return ret;
}
static cpu_set_t bind_to_cpu(int target_cpu)
@@ -288,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t bind_to_node(int target_node)
{
- int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
- BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
- BUG_ON(!cpus_per_node);
-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
@@ -305,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node)
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
- int cpu_start = (target_node + 0) * cpus_per_node;
- int cpu_stop = (target_node + 1) * cpus_per_node;
+ struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(cpu_stop > g->p.nr_cpus);
-
- for (cpu = cpu_start; cpu < cpu_stop; cpu++)
- CPU_SET(cpu, &mask);
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(target_node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu))
+ CPU_SET(cpu, &mask);
+ }
+ }
+ numa_free_cpumask(cpumask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
@@ -729,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return -1;
return parse_node_list(arg);
-
- return 0;
}
#define BIT(x) (1ul << x)
@@ -813,12 +815,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
}
}
} else if (!g->p.data_backwards || (nr + loop) & 1) {
+ /* Process data forwards: */
d0 = data + off;
d = data + off + 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d >= d1))
d = data;
@@ -836,7 +838,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
d = data + off - 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d < data))
d = data + words-1;
@@ -1733,12 +1734,12 @@ err:
*/
static const char *tests[][MAX_ARGS] = {
/* Basic single-stream NUMA bandwidth measurements: */