summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 07:41:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 07:41:01 -0700
commite0972916e8fe943f342b0dd1c9d43dbf5bc261c2 (patch)
tree690c436f1f9b839c4ba34d17ab3efa63b97a2dce /arch
parent1f889ec62c3f0d8913f3c32f9aff2a1e15099346 (diff)
parent5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Features: - Add "uretprobes" - an optimization to uprobes, like kretprobes are an optimization to kprobes. "perf probe -x file sym%return" now works like kretprobes. By Oleg Nesterov. - Introduce per core aggregation in 'perf stat', from Stephane Eranian. - Add memory profiling via PEBS, from Stephane Eranian. - Event group view for 'annotate' in --stdio, --tui and --gtk, from Namhyung Kim. - Add support for AMD NB and L2I "uncore" counters, by Jacob Shin. - Add Ivy Bridge-EP uncore support, by Zheng Yan - IBM zEnterprise EC12 oprofile support patchlet from Robert Richter. - Add perf test entries for checking breakpoint overflow signal handler issues, from Jiri Olsa. - Add perf test entry for for checking number of EXIT events, from Namhyung Kim. - Add perf test entries for checking --cpu in record and stat, from Jiri Olsa. - Introduce perf stat --repeat forever, from Frederik Deweerdt. - Add --no-demangle to report/top, from Namhyung Kim. - PowerPC fixes plus a couple of cleanups/optimizations in uprobes and trace_uprobes, by Oleg Nesterov. Various fixes and refactorings: - Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota. - Simplify some perf_evlist methods and to allow 'stat' to share code with 'record' and 'trace', by Arnaldo Carvalho de Melo. - Remove dead code in related to libtraceevent integration, from Namhyung Kim. - Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat' back working, by Arnaldo Carvalho de Melo - We don't use Newt anymore, just plain libslang, by Arnaldo Carvalho de Melo. - Kill a bunch of die() calls, from Namhyung Kim. - Fix build on non-glibc systems due to libio.h absence, from Cody P Schafer. - Remove some perf_session and tracing dead code, from David Ahern. - Honor parallel jobs, fix from Borislav Petkov - Introduce tools/lib/lk library, initially just removing duplication among tools/perf and tools/vm. from Borislav Petkov ... and many more I missed to list, see the shortlog and git log for more details." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (136 commits) perf/x86/intel/P4: Robistify P4 PMU types perf/x86/amd: Fix AMD NB and L2I "uncore" support perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c perf/x86: Check all MSRs before passing hw check perf/x86/amd: Add support for AMD NB and L2I "uncore" counters perf/x86/intel: Add Ivy Bridge-EP uncore support perf/x86/intel: Fix SNB-EP CBO and PCU uncore PMU filter management perf/x86: Avoid kfree() in CPU_{STARTING,DYING} uprobes/perf: Avoid perf_trace_buf_prepare/submit if ->perf_events is empty uprobes/tracing: Don't pass addr=ip to perf_trace_buf_submit() uprobes/tracing: Change create_trace_uprobe() to support uretprobes uprobes/tracing: Make seq_printf() code uretprobe-friendly uprobes/tracing: Make register_uprobe_event() paths uretprobe-friendly uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly uprobes/tracing: Introduce is_ret_probe() and uretprobe_dispatcher() uprobes/tracing: Introduce uprobe_{trace,perf}_print() helpers uprobes/tracing: Generalize struct uprobe_trace_entry_head uprobes/tracing: Kill the pointless local_save_flags/preempt_count calls uprobes/tracing: Kill the pointless seq_print_ip_sym() call uprobes/tracing: Kill the pointless task_pt_regs() calls ...
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/uprobes.h1
-rw-r--r--arch/powerpc/kernel/uprobes.c29
-rw-r--r--arch/s390/oprofile/init.c1
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h62
-rw-r--r--arch/x86/include/asm/uprobes.h1
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h5
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c89
-rw-r--r--arch/x86/kernel/cpu/perf_event.h56
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c138
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c547
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c38
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c182
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c876
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h64
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c9
-rw-r--r--arch/x86/kernel/kprobes/core.c6
-rw-r--r--arch/x86/kernel/uprobes.c29
19 files changed, 1834 insertions, 303 deletions
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index b532060d0916..23016020915e 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -51,4 +51,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
#endif /* _ASM_UPROBES_H */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index bc77834dbf43..59f419b935f2 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -31,6 +31,16 @@
#define UPROBE_TRAP_NR UINT_MAX
/**
+ * is_trap_insn - check if the instruction is a trap variant
+ * @insn: instruction to be checked.
+ * Returns true if @insn is a trap variant.
+ */
+bool is_trap_insn(uprobe_opcode_t *insn)
+{
+ return (is_trap(*insn));
+}
+
+/**
* arch_uprobe_analyze_insn
* @mm: the probed address space.
* @arch_uprobe: the probepoint information.
@@ -43,12 +53,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
if (addr & 0x03)
return -EINVAL;
- /*
- * We currently don't support a uprobe on an already
- * existing breakpoint instruction underneath
- */
- if (is_trap(auprobe->ainsn))
- return -ENOTSUPP;
return 0;
}
@@ -188,3 +192,16 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
return false;
}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
+{
+ unsigned long orig_ret_vaddr;
+
+ orig_ret_vaddr = regs->link;
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = trampoline_vaddr;
+
+ return orig_ret_vaddr;
+}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 584b93674ea4..ffeb17ce7f31 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -440,6 +440,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
switch (id.machine) {
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+ case 0x2827: ops->cpu_type = "s390/zEC12"; break;
default: return -ENODEV;
}
}
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929d1cee..ac10df72925b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
+#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 4f7e67e2345e..85e13ccf15c4 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -24,45 +24,45 @@
#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
-#define P4_ESCR_EVENT_MASK 0x7e000000U
+#define P4_ESCR_EVENT_MASK 0x7e000000ULL
#define P4_ESCR_EVENT_SHIFT 25
-#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
+#define P4_ESCR_EVENTMASK_MASK 0x01fffe00ULL
#define P4_ESCR_EVENTMASK_SHIFT 9
-#define P4_ESCR_TAG_MASK 0x000001e0U
+#define P4_ESCR_TAG_MASK 0x000001e0ULL
#define P4_ESCR_TAG_SHIFT 5
-#define P4_ESCR_TAG_ENABLE 0x00000010U
-#define P4_ESCR_T0_OS 0x00000008U
-#define P4_ESCR_T0_USR 0x00000004U
-#define P4_ESCR_T1_OS 0x00000002U
-#define P4_ESCR_T1_USR 0x00000001U
+#define P4_ESCR_TAG_ENABLE 0x00000010ULL
+#define P4_ESCR_T0_OS 0x00000008ULL
+#define P4_ESCR_T0_USR 0x00000004ULL
+#define P4_ESCR_T1_OS 0x00000002ULL
+#define P4_ESCR_T1_USR 0x00000001ULL
#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT)
#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
-#define P4_CCCR_OVF 0x80000000U
-#define P4_CCCR_CASCADE 0x40000000U
-#define P4_CCCR_OVF_PMI_T0 0x04000000U
-#define P4_CCCR_OVF_PMI_T1 0x08000000U
-#define P4_CCCR_FORCE_OVF 0x02000000U
-#define P4_CCCR_EDGE 0x01000000U
-#define P4_CCCR_THRESHOLD_MASK 0x00f00000U
+#define P4_CCCR_OVF 0x80000000ULL
+#define P4_CCCR_CASCADE 0x40000000ULL
+#define P4_CCCR_OVF_PMI_T0 0x04000000ULL
+#define P4_CCCR_OVF_PMI_T1 0x08000000ULL
+#define P4_CCCR_FORCE_OVF 0x02000000ULL
+#define P4_CCCR_EDGE 0x01000000ULL
+#define P4_CCCR_THRESHOLD_MASK 0x00f00000ULL
#define P4_CCCR_THRESHOLD_SHIFT 20
-#define P4_CCCR_COMPLEMENT 0x00080000U
-#define P4_CCCR_COMPARE 0x00040000U
-#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U
+#define P4_CCCR_COMPLEMENT 0x00080000ULL
+#define P4_CCCR_COMPARE 0x00040000ULL
+#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000ULL
#define P4_CCCR_ESCR_SELECT_SHIFT 13
-#define P4_CCCR_ENABLE 0x00001000U
-#define P4_CCCR_THREAD_SINGLE 0x00010000U
-#define P4_CCCR_THREAD_BOTH 0x00020000U
-#define P4_CCCR_THREAD_ANY 0x00030000U
-#define P4_CCCR_RESERVED 0x00000fffU
+#define P4_CCCR_ENABLE 0x00001000ULL
+#define P4_CCCR_THREAD_SINGLE 0x00010000ULL
+#define P4_CCCR_THREAD_BOTH 0x00020000ULL
+#define P4_CCCR_THREAD_ANY 0x00030000ULL
+#define P4_CCCR_RESERVED 0x00000fffULL
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
#define P4_GEN_ESCR_EMASK(class, name, bit) \
- class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
+ class##__##name = ((1ULL << bit) << P4_ESCR_EVENTMASK_SHIFT)
#define P4_ESCR_EMASK_BIT(class, name) class##__##name
/*
@@ -107,7 +107,7 @@
* P4_PEBS_CONFIG_MASK and related bits on
* modification.)
*/
-#define P4_CONFIG_ALIASABLE (1 << 9)
+#define P4_CONFIG_ALIASABLE (1ULL << 9)
/*
* The bits we allow to pass for RAW events
@@ -784,17 +784,17 @@ enum P4_ESCR_EMASKS {
* Note we have UOP and PEBS bits reserved for now
* just in case if we will need them once
*/
-#define P4_PEBS_CONFIG_ENABLE (1 << 7)
-#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
-#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
-#define P4_PEBS_CONFIG_MASK 0xff
+#define P4_PEBS_CONFIG_ENABLE (1ULL << 7)
+#define P4_PEBS_CONFIG_UOP_TAG (1ULL << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK 0x3FLL
+#define P4_PEBS_CONFIG_MASK 0xFFLL
/*
* mem: Only counters MSR_IQ_COUNTER4 (16) and
* MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
*/
-#define P4_PEBS_ENABLE 0x02000000U
-#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
+#define P4_PEBS_ENABLE 0x02000000ULL
+#define P4_PEBS_ENABLE_UOP_TAG 0x01000000ULL
#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 8ff8be7835ab..6e5197910fd8 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -55,4 +55,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 7a060f4b411f..b5757885d7a4 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -72,6 +72,7 @@
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_MTRRfix64K_00000 0x00000250
#define MSR_MTRRfix16K_80000 0x00000258
@@ -195,6 +196,10 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c9496313843b..deef0399fc78 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bf0f01aea994..1025f3c99d20 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -180,8 +180,9 @@ static void release_pmc_hardware(void) {}
static bool check_hw_exists(void)
{
- u64 val, val_new = ~0;
- int i, reg, ret = 0;
+ u64 val, val_fail, val_new= ~0;
+ int i, reg, reg_fail, ret = 0;
+ int bios_fail = 0;
/*
* Check to see if the BIOS enabled any of the counters, if so
@@ -192,8 +193,11 @@ static bool check_hw_exists(void)
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
- if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
- goto bios_fail;
+ if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
+ bios_fail = 1;
+ val_fail = val;
+ reg_fail = reg;
+ }
}
if (x86_pmu.num_counters_fixed) {
@@ -202,8 +206,11 @@ static bool check_hw_exists(void)
if (ret)
goto msr_fail;
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
- if (val & (0x03 << i*4))
- goto bios_fail;
+ if (val & (0x03 << i*4)) {
+ bios_fail = 1;
+ val_fail = val;
+ reg_fail = reg;
+ }
}
}
@@ -221,14 +228,13 @@ static bool check_hw_exists(void)
if (ret || val != val_new)
goto msr_fail;
- return true;
-
-bios_fail:
/*
* We still allow the PMU driver to operate:
*/
- printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
- printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+ if (bios_fail) {
+ printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
+ printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
+ }
return true;
@@ -1316,9 +1322,16 @@ static struct attribute_group x86_pmu_format_group = {
*/
static void __init filter_events(struct attribute **attrs)
{
+ struct device_attribute *d;
+ struct perf_pmu_events_attr *pmu_attr;
int i, j;
for (i = 0; attrs[i]; i++) {
+ d = (struct device_attribute *)attrs[i];
+ pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
+ /* str trumps id */
+ if (pmu_attr->event_str)
+ continue;
if (x86_pmu.event_map(i))
continue;
@@ -1330,22 +1343,45 @@ static void __init filter_events(struct attribute **attrs)
}
}
-static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+/* Merge two pointer arrays */
+static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
container_of(attr, struct perf_pmu_events_attr, attr);
-
u64 config = x86_pmu.event_map(pmu_attr->id);
- return x86_pmu.events_sysfs_show(page, config);
-}
-#define EVENT_VAR(_id) event_attr_##_id
-#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+ /* string trumps id */
+ if (pmu_attr->event_str)
+ return sprintf(page, "%s", pmu_attr->event_str);
-#define EVENT_ATTR(_name, _id) \
- PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
- events_sysfs_show)
+ return x86_pmu.events_sysfs_show(page, config);
+}
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
@@ -1459,16 +1495,27 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters, 0);
+ 0, x86_pmu.num_counters, 0, 0);
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+ if (x86_pmu.event_attrs)
+ x86_pmu_events_group.attrs = x86_pmu.event_attrs;
+
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
else
filter_events(x86_pmu_events_group.attrs);
+ if (x86_pmu.cpu_events) {
+ struct attribute **tmp;
+
+ tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
+ if (!WARN_ON(!tmp))
+ x86_pmu_events_group.attrs = tmp;
+ }
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7f5c75c2afdd..ba9aadfa683b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -46,6 +46,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
+ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
EXTRA_REG_MAX /* number of entries needed */
};
@@ -59,7 +60,13 @@ struct event_constraint {
u64 cmask;
int weight;
int overlap;
+ int flags;
};
+/*
+ * struct event_constraint flags
+ */
+#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -170,16 +177,17 @@ struct cpu_hw_events {
void *kfree_on_online;
};
-#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
{ .idxmsk64 = (n) }, \
.code = (c), \
.cmask = (m), \
.weight = (w), \
.overlap = (o), \
+ .flags = f, \
}
#define EVENT_CONSTRAINT(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
/*
* The overlap flag marks event constraints with overlapping counter
@@ -203,7 +211,7 @@ struct cpu_hw_events {
* and its counter masks must be kept at a minimum.
*/
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
/*
* Constraint on the Event code.
@@ -231,6 +239,14 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define INTEL_PLD_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+
+#define INTEL_PST_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+
#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)
@@ -260,12 +276,22 @@ struct extra_reg {
.msr = (ms), \
.config_mask = (m), \
.valid_mask = (vm), \
- .idx = EXTRA_REG_##i \
+ .idx = EXTRA_REG_##i, \
}
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
+
+#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
+ INTEL_UEVENT_EXTRA_REG(c, \
+ MSR_PEBS_LD_LAT_THRESHOLD, \
+ 0xffff, \
+ LDLAT)
+
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
union perf_capabilities {
@@ -355,8 +381,10 @@ struct x86_pmu {
*/
int attr_rdpmc;
struct attribute **format_attrs;
+ struct attribute **event_attrs;
ssize_t (*events_sysfs_show)(char *page, u64 config);
+ struct attribute **cpu_events;
/*
* CPU Hotplug hooks
@@ -421,6 +449,23 @@ do { \
#define ERF_NO_HT_SHARING 1
#define ERF_HAS_RSP_1 2
+#define EVENT_VAR(_id) event_attr_##_id
+#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+
+#define EVENT_ATTR(_name, _id) \
+static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
+ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ .id = PERF_COUNT_HW_##_id, \
+ .event_str = NULL, \
+};
+
+#define EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
+};
+
extern struct x86_pmu x86_pmu __read_mostly;
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -628,6 +673,9 @@ int p6_pmu_init(void);
int knc_pmu_init(void);
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
+
#else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index dfdab42aed27..7e28d9467bb4 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,14 +132,11 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
-static struct event_constraint *amd_nb_event_constraint;
-
/*
* Previously calculated offsets
*/
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
/*
* Legacy CPUs:
@@ -147,14 +144,10 @@ static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
*
* CPUs with core performance counter extensions:
* 6 counters starting at 0xc0010200 each offset by 2
- *
- * CPUs with north bridge performance counter extensions:
- * 4 additional counters starting at 0xc0010240 each offset by 2
- * (indexed right above either one of the above core counters)
*/
static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
- int offset, first, base;
+ int offset;
if (!index)
return index;
@@ -167,23 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
if (offset)
return offset;
- if (amd_nb_event_constraint &&
- test_bit(index, amd_nb_event_constraint->idxmsk)) {
- /*
- * calculate the offset of NB counters with respect to
- * base eventsel or perfctr
- */
-
- first = find_first_bit(amd_nb_event_constraint->idxmsk,
- X86_PMC_IDX_MAX);
-
- if (eventsel)
- base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
- else
- base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
-
- offset = base + ((index - first) << 1);
- } else if (!cpu_has_perfctr_core)
+ if (!cpu_has_perfctr_core)
offset = index;
else
offset = index << 1;
@@ -196,36 +173,6 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
return offset;
}
-static inline int amd_pmu_rdpmc_index(int index)
-{
- int ret, first;
-
- if (!index)
- return index;
-
- ret = rdpmc_indexes[index];
-
- if (ret)
- return ret;
-
- if (amd_nb_event_constraint &&
- test_bit(index, amd_nb_event_constraint->idxmsk)) {
- /*
- * according to the mnual, ECX value of the NB counters is
- * the index of the NB counter (0, 1, 2 or 3) plus 6
- */
-
- first = find_first_bit(amd_nb_event_constraint->idxmsk,
- X86_PMC_IDX_MAX);
- ret = index - first + 6;
- } else
- ret = index;
-
- rdpmc_indexes[index] = ret;
-
- return ret;
-}
-
static int amd_core_hw_config(struct perf_event *event)
{
if (event->attr.exclude_host && event->attr.exclude_guest)
@@ -245,34 +192,6 @@ static int amd_core_hw_config(struct perf_event *event)
}
/*
- * NB counters do not support the following event select bits:
- * Host/Guest only
- * Counter mask
- * Invert counter mask
- * Edge detect
- * OS/User mode
- */
-static int amd_nb_hw_config(struct perf_event *event)
-{
- /* for NB, we only allow system wide counting mode */
- if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
- return -EINVAL;
-
- if (event->attr.exclude_user || event->attr.exclude_kernel ||
- event->attr.exclude_host || event->attr.exclude_guest)
- return -EINVAL;
-
- event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
- ARCH_PERFMON_EVENTSEL_OS);
-
- if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
- ARCH_PERFMON_EVENTSEL_INT))
- return -EINVAL;
-
- return 0;
-}
-
-/*
* AMD64 events are detected based on their event codes.
*/
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
@@ -285,11 +204,6 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
-static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
-{
- return amd_nb_event_constraint && amd_is_nb_event(hwc);
-}
-
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -315,9 +229,6 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (event->attr.type == PERF_TYPE_RAW)
event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
- if (amd_is_perfctr_nb_event(&event->hw))
- return amd_nb_hw_config(event);
-
return amd_core_hw_config(event);
}
@@ -341,19 +252,6 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
}
}
-static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
-{
- int core_id = cpu_data(smp_processor_id()).cpu_core_id;
-
- /* deliver interrupts only to this core */
- if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
- hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
- hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
- hwc->config |= (u64)(core_id) <<
- AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
- }
-}
-
/*
* AMD64 NorthBridge events need special treatment because
* counter access needs to be synchronized across all cores
@@ -441,9 +339,6 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
if (new == -1)
return &emptyconstraint;
- if (amd_is_perfctr_nb_event(hwc))
- amd_nb_interrupt_hw_config(hwc);
-
return &nb->event_constraints[new];
}
@@ -543,8 +438,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
return &unconstrained;
- return __amd_get_nb_event_constraints(cpuc, event,
- amd_nb_event_constraint);
+ return __amd_get_nb_event_constraints(cpuc, event, NULL);
}
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -643,9 +537,6 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
-static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
-static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
-
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -711,8 +602,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- return __amd_get_nb_event_constraints(cpuc, event,
- amd_nb_event_constraint);
+ /* moved to perf_event_amd_uncore.c */
+ return &emptyconstraint;
default:
return &emptyconstraint;
}
@@ -738,7 +629,6 @@ static __initconst const struct x86_pmu amd_pmu = {
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
.addr_offset = amd_pmu_addr_offset,
- .rdpmc_index = amd_pmu_rdpmc_index,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
@@ -790,23 +680,6 @@ static int setup_perfctr_core(void)
return 0;
}
-static int setup_perfctr_nb(void)
-{
- if (!cpu_has_perfctr_nb)
- return -ENODEV;
-
- x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
-
- if (cpu_has_perfctr_core)
- amd_nb_event_constraint = &amd_NBPMC96;
- else
- amd_nb_event_constraint = &amd_NBPMC74;
-
- printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
-
- return 0;
-}
-
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -817,7 +690,6 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
- setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
new file mode 100644
index 000000000000..c0c661adf03e
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/cpufeature.h>
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#define NUM_COUNTERS_NB 4
+#define NUM_COUNTERS_L2 4
+#define MAX_COUNTERS NUM_COUNTERS_NB