summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 14:51:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 14:51:09 -0700
commitb34133fec882d2717f2d61a2a010edd3422368c8 (patch)
tree699dc2a510e1c3bbece7d8ea593ee536ea464465 /arch
parent9dee86896c5968a928e56828236af41c136bdfbd (diff)
parentd903b6d029d66e6478562d75ea18d89098f7b7e8 (diff)
Merge tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf event updates from Ingo Molnar: "HW support updates: - Add uncore support for Intel Comet Lake - Add RAPL support for Hygon Fam18h - Add Intel "IIO stack to PMON mapping" support on Skylake-SP CPUs, which enumerates per device performance counters via sysfs and enables the perf stat --iiostat functionality - Add support for Intel "Architectural LBRs", which generalized the model specific LBR hardware tracing feature into a model-independent, architected performance monitoring feature. Usage is mostly seamless to tooling, as the pre-existing LBR features are kept, but there's a couple of advantages under the hood, such as faster context-switching, faster LBR reads, cleaner exposure of LBR features to guest kernels, etc. ( Since architectural LBRs are supported via XSAVE, there's related changes to the x86 FPU code as well. ) ftrace/perf updates: - Add support to add a text poke event to record changes to kernel text (i.e. self-modifying code) in order to support tracers like Intel PT decoding through jump labels, kprobes and ftrace trampolines. Misc cleanups, smaller fixes..." * tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits) perf/x86/rapl: Add Hygon Fam18h RAPL support kprobes: Remove unnecessary module_mutex locking from kprobe_optimizer() x86/perf: Fix a typo perf: <linux/perf_event.h>: drop a duplicated word perf/x86/intel/lbr: Support XSAVES for arch LBR read perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch x86/fpu/xstate: Add helpers for LBR dynamic supervisor feature x86/fpu/xstate: Support dynamic supervisor feature for LBR x86/fpu: Use proper mask to replace full instruction mask perf/x86: Remove task_ctx_size perf/x86/intel/lbr: Create kmem_cache for the LBR context data perf/core: Use kmem_cache to allocate the PMU specific data perf/core: Factor out functions to allocate/free the task_ctx_data perf/x86/intel/lbr: Support Architectural LBR perf/x86/intel/lbr: Factor out intel_pmu_store_lbr perf/x86/intel/lbr: Factor out rdlbr_all() and wrlbr_all() perf/x86/intel/lbr: Mark the {rd,wr}lbr_{to,from} wrappers __always_inline perf/x86/intel/lbr: Unify the stored format of LBR information perf/x86/intel/lbr: Support LBR_CTL perf/x86: Expose CPUID enumeration bits for arch LBR ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/events/core.c28
-rw-r--r--arch/x86/events/intel/core.c127
-rw-r--r--arch/x86/events/intel/ds.c6
-rw-r--r--arch/x86/events/intel/lbr.c733
-rw-r--r--arch/x86/events/intel/uncore.c26
-rw-r--r--arch/x86/events/intel/uncore.h37
-rw-r--r--arch/x86/events/intel/uncore_snb.c80
-rw-r--r--arch/x86/events/intel/uncore_snbep.c208
-rw-r--r--arch/x86/events/perf_event.h125
-rw-r--r--arch/x86/events/rapl.c3
-rw-r--r--arch/x86/events/zhaoxin/core.c2
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/fpu/internal.h47
-rw-r--r--arch/x86/include/asm/fpu/types.h27
-rw-r--r--arch/x86/include/asm/fpu/xstate.h36
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/asm/perf_event.h82
-rw-r--r--arch/x86/kernel/alternative.c37
-rw-r--r--arch/x86/kernel/fpu/core.c39
-rw-r--r--arch/x86/kernel/fpu/xstate.c89
-rw-r--r--arch/x86/kernel/kprobes/core.c15
-rw-r--r--arch/x86/kernel/kprobes/opt.c38
23 files changed, 1548 insertions, 256 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 4103665c6e03..1cbf57dc2ac8 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -71,10 +71,9 @@ u64 x86_perf_event_update(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int shift = 64 - x86_pmu.cntval_bits;
u64 prev_raw_count, new_raw_count;
- int idx = hwc->idx;
u64 delta;
- if (idx == INTEL_PMC_IDX_FIXED_BTS)
+ if (unlikely(!hwc->event_base))
return 0;
/*
@@ -359,6 +358,7 @@ void x86_release_hardware(void)
if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
release_pmc_hardware();
release_ds_buffers();
+ release_lbr_buffers();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -1097,22 +1097,31 @@ static inline void x86_assign_hw_event(struct perf_event *event,
struct cpu_hw_events *cpuc, int i)
{
struct hw_perf_event *hwc = &event->hw;
+ int idx;
- hwc->idx = cpuc->assign[i];
+ idx = hwc->idx = cpuc->assign[i];
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];
- if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
+ switch (hwc->idx) {
+ case INTEL_PMC_IDX_FIXED_BTS:
+ case INTEL_PMC_IDX_FIXED_VLBR:
hwc->config_base = 0;
hwc->event_base = 0;
- } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+ break;
+
+ case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
- hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
- } else {
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
+ (idx - INTEL_PMC_IDX_FIXED);
+ hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+ break;
+
+ default:
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
+ break;
}
}
@@ -1233,7 +1242,7 @@ int x86_perf_event_set_period(struct perf_event *event)
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;
- if (idx == INTEL_PMC_IDX_FIXED_BTS)
+ if (unlikely(!hwc->event_base))
return 0;
/*
@@ -2363,7 +2372,6 @@ static struct pmu pmu = {
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
- .task_ctx_size = sizeof(struct x86_perf_task_context),
.swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ca35c8b5ee10..50963472ee85 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2136,8 +2136,35 @@ static inline void intel_pmu_ack_status(u64 ack)
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}
-static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+static inline bool event_is_checkpointed(struct perf_event *event)
{
+ return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
+static inline void intel_set_masks(struct perf_event *event, int idx)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (event->attr.exclude_host)
+ __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+ if (event->attr.exclude_guest)
+ __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+ if (event_is_checkpointed(event))
+ __set_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
+}
+
+static inline void intel_clear_masks(struct perf_event *event, int idx)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+ __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+ __clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
+}
+
+static void intel_pmu_disable_fixed(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask;
@@ -2148,30 +2175,22 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
wrmsrl(hwc->config_base, ctrl_val);
}
-static inline bool event_is_checkpointed(struct perf_event *event)
-{
- return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
-}
-
static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx = hwc->idx;
- if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+ if (idx < INTEL_PMC_IDX_FIXED) {
+ intel_clear_masks(event, idx);
+ x86_pmu_disable_event(event);
+ } else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
+ intel_clear_masks(event, idx);
+ intel_pmu_disable_fixed(event);
+ } else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer();
- return;
- }
-
- cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
- cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
- cpuc->intel_cp_status &= ~(1ull << hwc->idx);
-
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
- intel_pmu_disable_fixed(hwc);
- else
- x86_pmu_disable_event(event);
+ } else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
+ intel_clear_masks(event, idx);
/*
* Needs to be called after x86_pmu_disable_event,
@@ -2238,33 +2257,23 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
static void intel_pmu_enable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
- if (!__this_cpu_read(cpu_hw_events.enabled))
- return;
-
- intel_pmu_enable_bts(hwc->config);
- return;
- }
-
- if (event->attr.exclude_host)
- cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
- if (event->attr.exclude_guest)
- cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
-
- if (unlikely(event_is_checkpointed(event)))
- cpuc->intel_cp_status |= (1ull << hwc->idx);
+ int idx = hwc->idx;
if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_enable(event);
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ if (idx < INTEL_PMC_IDX_FIXED) {
+ intel_set_masks(event, idx);
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ } else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
+ intel_set_masks(event, idx);
intel_pmu_enable_fixed(event);
- return;
- }
-
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ } else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
+ if (!__this_cpu_read(cpu_hw_events.enabled))
+ return;
+ intel_pmu_enable_bts(hwc->config);
+ } else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
+ intel_set_masks(event, idx);
}
static void intel_pmu_add_event(struct perf_event *event)
@@ -2614,6 +2623,20 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}
+/*
+ * Note: matches a fake event, like Fixed2.
+ */
+static struct event_constraint *
+intel_vlbr_constraints(struct perf_event *event)
+{
+ struct event_constraint *c = &vlbr_constraint;
+
+ if (unlikely(constraint_match(c, event->hw.config)))
+ return c;
+
+ return NULL;
+}
+
static int intel_alt_er(int idx, u64 config)
{
int alt_idx = idx;
@@ -2804,6 +2827,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
{
struct event_constraint *c;
+ c = intel_vlbr_constraints(event);
+ if (c)
+ return c;
+
c = intel_bts_constraints(event);
if (c)
return c;
@@ -3951,6 +3978,11 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_dead = intel_pmu_cpu_dead,
.check_period = intel_pmu_check_period,
+
+ .lbr_reset = intel_pmu_lbr_reset_64,
+ .lbr_read = intel_pmu_lbr_read_64,
+ .lbr_save = intel_pmu_lbr_save,
+ .lbr_restore = intel_pmu_lbr_restore,
};
static __initconst const struct x86_pmu intel_pmu = {
@@ -3996,6 +4028,11 @@ static __initconst const struct x86_pmu intel_pmu = {
.check_period = intel_pmu_check_period,
.aux_output_match = intel_pmu_aux_output_match,
+
+ .lbr_reset = intel_pmu_lbr_reset_64,
+ .lbr_read = intel_pmu_lbr_read_64,
+ .lbr_save = intel_pmu_lbr_save,
+ .lbr_restore = intel_pmu_lbr_restore,
};
static __init void intel_clovertown_quirk(void)
@@ -4622,6 +4659,14 @@ __init int intel_pmu_init(void)
x86_pmu.intel_cap.capabilities = capabilities;
}
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) {
+ x86_pmu.lbr_reset = intel_pmu_lbr_reset_32;
+ x86_pmu.lbr_read = intel_pmu_lbr_read_32;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
+ intel_pmu_arch_lbr_init();
+
intel_ds_init();
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index dc43cc124e09..86848c57b55e 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -954,7 +954,7 @@ static void adaptive_pebs_record_size_update(void)
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
sz += sizeof(struct pebs_xmm);
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
- sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
+ sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
cpuc->pebs_record_size = sz;
}
@@ -1595,10 +1595,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}
if (format_size & PEBS_DATACFG_LBRS) {
- struct pebs_lbr *lbr = next_record;
+ struct lbr_entry *lbr = next_record;
int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
& 0xff) + 1;
- next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
+ next_record = next_record + num_lbr * sizeof(struct lbr_entry);
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 65113b16804a..63f58bdf556c 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -8,17 +8,6 @@
#include "../perf_event.h"
-enum {
- LBR_FORMAT_32 = 0x00,
- LBR_FORMAT_LIP = 0x01,
- LBR_FORMAT_EIP = 0x02,
- LBR_FORMAT_EIP_FLAGS = 0x03,
- LBR_FORMAT_EIP_FLAGS2 = 0x04,
- LBR_FORMAT_INFO = 0x05,
- LBR_FORMAT_TIME = 0x06,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
-};
-
static const enum {
LBR_EIP_FLAGS = 1,
LBR_TSX = 2,
@@ -143,8 +132,54 @@ enum {
X86_BR_IRQ |\
X86_BR_INT)
+/*
+ * Intel LBR_CTL bits
+ *
+ * Hardware branch filter for Arch LBR
+ */
+#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */
+#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */
+#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */
+#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */
+#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */
+#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */
+#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */
+#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */
+#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */
+#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */
+
+#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT)
+#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT)
+#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT)
+#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT)
+#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT)
+#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT)
+#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT)
+#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT)
+#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT)
+#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
+
+#define ARCH_LBR_ANY \
+ (ARCH_LBR_JCC |\
+ ARCH_LBR_REL_JMP |\
+ ARCH_LBR_IND_JMP |\
+ ARCH_LBR_REL_CALL |\
+ ARCH_LBR_IND_CALL |\
+ ARCH_LBR_RETURN |\
+ ARCH_LBR_OTHER_BRANCH)
+
+#define ARCH_LBR_CTL_MASK 0x7f000e
+
static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
+{
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+ return !!(config & ARCH_LBR_CALL_STACK);
+
+ return !!(config & LBR_CALL_STACK);
+}
+
/*
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
* otherwise it becomes near impossible to get a reliable stack.
@@ -168,33 +203,46 @@ static void __intel_pmu_lbr_enable(bool pmi)
*/
if (cpuc->lbr_sel)
lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
- if (!pmi && cpuc->lbr_sel)
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
wrmsrl(MSR_LBR_SELECT, lbr_select);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
orig_debugctl = debugctl;
- debugctl |= DEBUGCTLMSR_LBR;
+
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
+ debugctl |= DEBUGCTLMSR_LBR;
/*
* LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
* If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
* may cause superfluous increase/decrease of LBR_TOS.
*/
- if (!(lbr_select & LBR_CALL_STACK))
+ if (is_lbr_call_stack_bit_set(lbr_select))
+ debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+ else
debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+
if (orig_debugctl != debugctl)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+ wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
}
static void __intel_pmu_lbr_disable(void)
{
u64 debugctl;
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+ wrmsrl(MSR_ARCH_LBR_CTL, 0);
+ return;
+ }
+
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}
-static void intel_pmu_lbr_reset_32(void)
+void intel_pmu_lbr_reset_32(void)
{
int i;
@@ -202,7 +250,7 @@ static void intel_pmu_lbr_reset_32(void)
wrmsrl(x86_pmu.lbr_from + i, 0);
}
-static void intel_pmu_lbr_reset_64(void)
+void intel_pmu_lbr_reset_64(void)
{
int i;
@@ -210,10 +258,16 @@ static void intel_pmu_lbr_reset_64(void)
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
- wrmsrl(MSR_LBR_INFO_0 + i, 0);
+ wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
+static void intel_pmu_arch_lbr_reset(void)
+{
+ /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
+ wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
+}
+
void intel_pmu_lbr_reset(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -221,10 +275,7 @@ void intel_pmu_lbr_reset(void)
if (!x86_pmu.lbr_nr)
return;
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
- intel_pmu_lbr_reset_32();
- else
- intel_pmu_lbr_reset_64();
+ x86_pmu.lbr_reset();
cpuc->last_task_ctx = NULL;
cpuc->last_log_id = 0;
@@ -308,69 +359,97 @@ static u64 lbr_from_signext_quirk_rd(u64 val)
return val;
}
-static inline void wrlbr_from(unsigned int idx, u64 val)
+static __always_inline void wrlbr_from(unsigned int idx, u64 val)
{
val = lbr_from_signext_quirk_wr(val);
wrmsrl(x86_pmu.lbr_from + idx, val);
}
-static inline void wrlbr_to(unsigned int idx, u64 val)
+static __always_inline void wrlbr_to(unsigned int idx, u64 val)
{
wrmsrl(x86_pmu.lbr_to + idx, val);
}
-static inline u64 rdlbr_from(unsigned int idx)
+static __always_inline void wrlbr_info(unsigned int idx, u64 val)
+{
+ wrmsrl(x86_pmu.lbr_info + idx, val);
+}
+
+static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
{
u64 val;
+ if (lbr)
+ return lbr->from;
+
rdmsrl(x86_pmu.lbr_from + idx, val);
return lbr_from_signext_quirk_rd(val);
}
-static inline u64 rdlbr_to(unsigned int idx)
+static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
{
u64 val;
+ if (lbr)
+ return lbr->to;
+
rdmsrl(x86_pmu.lbr_to + idx, val);
return val;
}
-static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
+{
+ u64 val;
+
+ if (lbr)
+ return lbr->info;
+
+ rdmsrl(x86_pmu.lbr_info + idx, val);
+
+ return val;
+}
+
+static inline void
+wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
+{
+ wrlbr_from(idx, lbr->from);
+ wrlbr_to(idx, lbr->to);
+ if (need_info)
+ wrlbr_info(idx, lbr->info);
+}
+
+static inline bool
+rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
+{
+ u64 from = rdlbr_from(idx, NULL);
+
+ /* Don't read invalid entry */
+ if (!from)
+ return false;
+
+ lbr->from = from;
+ lbr->to = rdlbr_to(idx, NULL);
+ if (need_info)
+ lbr->info = rdlbr_info(idx, NULL);
+
+ return true;
+}
+
+void intel_pmu_lbr_restore(void *ctx)
{
+ bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_task_context *task_ctx = ctx;
int i;
unsigned lbr_idx, mask;
- u64 tos;
-
- if (task_ctx->lbr_callstack_users == 0 ||
- task_ctx->lbr_stack_state == LBR_NONE) {
- intel_pmu_lbr_reset();
- return;
- }
-
- tos = task_ctx->tos;
- /*
- * Does not restore the LBR registers, if
- * - No one else touched them, and
- * - Did not enter C6
- */
- if ((task_ctx == cpuc->last_task_ctx) &&
- (task_ctx->log_id == cpuc->last_log_id) &&
- rdlbr_from(tos)) {
- task_ctx->lbr_stack_state = LBR_NONE;
- return;
- }
+ u64 tos = task_ctx->tos;
mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
lbr_idx = (tos - i) & mask;
- wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
- wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
-
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
- wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+ wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
}
for (; i < x86_pmu.lbr_nr; i++) {
@@ -378,49 +457,149 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
- wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
+ wrlbr_info(lbr_idx, 0);
}
wrmsrl(x86_pmu.lbr_tos, tos);
- task_ctx->lbr_stack_state = LBR_NONE;
+
+ if (cpuc->lbr_select)
+ wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
}
-static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+static void intel_pmu_arch_lbr_restore(void *ctx)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- unsigned lbr_idx, mask;
- u64 tos, from;
+ struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
+ struct lbr_entry *entries = task_ctx->entries;
int i;
- if (task_ctx->lbr_callstack_users == 0) {
- task_ctx->lbr_stack_state = LBR_NONE;
+ /* Fast reset the LBRs before restore if the call stack is not full. */
+ if (!entries[x86_pmu.lbr_nr - 1].from)
+ intel_pmu_arch_lbr_reset();
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ if (!entries[i].from)
+ break;
+ wrlbr_all(&entries[i], i, true);
+ }
+}
+
+/*
+ * Restore the Architecture LBR state from the xsave area in the perf
+ * context data for the task via the XRSTORS instruction.
+ */
+static void intel_pmu_arch_lbr_xrstors(void *ctx)
+{
+ struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
+
+ copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
+}
+
+static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
+{
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+ return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
+
+ return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
+}
+
+static void __intel_pmu_lbr_restore(void *ctx)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
+ task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
+ intel_pmu_lbr_reset();
+ return;
+ }
+
+ /*
+ * Does not restore the LBR registers, if
+ * - No one else touched them, and
+ * - Was not cleared in Cstate
+ */
+ if ((ctx == cpuc->last_task_ctx) &&
+ (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
+ !lbr_is_reset_in_cstate(ctx)) {
+ task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
return;
}
+ x86_pmu.lbr_restore(ctx);
+
+ task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
+}
+
+void intel_pmu_lbr_save(void *ctx)
+{
+ bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_task_context *task_ctx = ctx;
+ unsigned lbr_idx, mask;
+ u64 tos;
+ int i;
+
mask = x86_pmu.lbr_nr - 1;
tos = intel_pmu_lbr_tos();
for (i = 0; i < x86_pmu.lbr_nr; i++) {
lbr_idx = (tos - i) & mask;
- from = rdlbr_from(lbr_idx);
- if (!from)
+ if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
break;
- task_ctx->lbr_from[i] = from;
- task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
- rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
task_ctx->valid_lbrs = i;
task_ctx->tos = tos;
- task_ctx->lbr_stack_state = LBR_VALID;
- cpuc->last_task_ctx = task_ctx;
- cpuc->last_log_id = ++task_ctx->log_id;
+ if (cpuc->lbr_select)
+ rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
+}
+
+static void intel_pmu_arch_lbr_save(void *ctx)
+{
+ struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
+ struct lbr_entry *entries = task_ctx->entries;
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ if (!rdlbr_all(&entries[i], i, true))
+ break;
+ }
+
+ /* LBR call stack is not full. Reset is required in restore. */
+ if (i < x86_pmu.lbr_nr)
+ entries[x86_pmu.lbr_nr - 1].from = 0;
+}
+
+/*
+ * Save the Architecture LBR state to the xsave area in the perf
+ * context data for the task via the XSAVES instruction.
+ */
+static void intel_pmu_arch_lbr_xsaves(void *ctx)
+{
+ struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
+
+ copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
+}
+
+static void __intel_pmu_lbr_save(void *ctx)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (task_context_opt(ctx)->lbr_callstack_users == 0) {
+ task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
+ return;
+ }
+
+ x86_pmu.lbr_save(ctx);
+
+ task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
+
+ cpuc->last_task_ctx = ctx;
+ cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
}
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
- struct x86_perf_task_context *prev_ctx_data, *next_ctx_data;
+ void *prev_ctx_data, *next_ctx_data;
swap(prev->task_ctx_data, next->task_ctx_data);
@@ -436,14 +615,14 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
if (!prev_ctx_data || !next_ctx_data)
return;
- swap(prev_ctx_data->lbr_callstack_users,
- next_ctx_data->lbr_callstack_users);
+ swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
+ task_context_opt(next_ctx_data)->lbr_callstack_users);
}
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct x86_perf_task_context *task_ctx;
+ void *task_ctx;
if (!cpuc->lbr_users)
return;
@@ -479,18 +658,19 @@ static inline bool branch_user_callstack(unsigned br_sel)
void intel_pmu_lbr_add(struct perf_event *event)
{
+ struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct x86_perf_task_context *task_ctx;
if (!x86_pmu.lbr_nr)
return;
+ if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
+ cpuc->lbr_select = 1;
+
cpuc->br_sel = event->hw.branch_reg.reg;
- if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
- task_ctx = event->ctx->task_ctx_data;
- task_ctx->lbr_callstack_users++;
- }
+ if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
+ task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
/*
* Request pmu::sched_task() callback, which will fire inside the
@@ -516,21 +696,44 @@ void intel_pmu_lbr_add(struct perf_event *event)
perf_sched_cb_inc(event->ctx->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
intel_pmu_lbr_reset();
+
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
+ kmem_cache && !cpuc->lbr_xsave &&
+ (cpuc->lbr_users != cpuc->lbr_pebs_users))
+ cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL);
+}
+
+void release_lbr_buffers(void)
+{
+ struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache;
+ struct cpu_hw_events *cpuc;
+ int cpu;
+
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+ if (kmem_cache && cpuc->lbr_xsave) {
+ kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
+ cpuc->lbr_xsave = NULL;
+ }
+ }
}
void intel_pmu_lbr_del(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct x86_perf_task_context *task_ctx;
if (!x86_pmu.lbr_nr)
return;
if (branch_user_callstack(cpuc->br_sel) &&
- event->ctx->task_ctx_data) {
- task_ctx = event->ctx->task_ctx_data;
- task_ctx->lbr_callstack_users--;
- }
+ event->ctx->task_ctx_data)
+ task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
+
+ if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
+ cpuc->lbr_select = 0;
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
cpuc->lbr_pebs_users--;
@@ -540,11 +743,19 @@ void intel_pmu_lbr_del(struct perf_event *event)
perf_sched_cb_dec(event->ctx->pmu);
}
+static inline bool vlbr_exclude_host(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
+ (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+}
+
void intel_pmu_lbr_enable_all(bool pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (cpuc->lbr_users)
+ if (cpuc->lbr_users && !vlbr_exclude_host())
__intel_pmu_lbr_enable(pmi);
}
@@ -552,11 +763,11 @@ void intel_pmu_lbr_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (cpuc->lbr_users)
+ if (cpuc->lbr_users && !vlbr_exclude_host())
__intel_pmu_lbr_disable();
}
-static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+void intel_pmu_lbr_read_3