summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-09 11:15:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-09 11:15:52 -0700
commit608745f12462e2d8d94d5cc5dc94bf0960a881e3 (patch)
treee938bc957d235018d1cf4b018b09aaffae8a5f34 /arch
parentcdc5ffc4100549654e19e6f068cf1fc0871a85c2 (diff)
parentd1d59b817939821bee149e870ce7723f61ffb512 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main changes in this cycle on the kernel side were: - CPU PMU and uncore driver updates to Intel Snow Ridge, IceLake, KabyLake, AmberLake and WhiskeyLake CPUs. - Rework the MSR probing infrastructure to make it more robust, make it work better on virtualized systems and to better expose it on sysfs. - Rework PMU attributes group support based on the feedback from Greg. The core sysfs patch that adds sysfs_update_groups() was acked by Greg. There's a lot of perf tooling changes as well, all around the place: - vendor updates to Intel, cs-etm (ARM), ARM64, s390, - various enhancements to Intel PT tooling support: - Improve CBR (Core to Bus Ratio) packets support. - Export power and ptwrite events to sqlite and postgresql. - Add support for decoding PEBS via PT packets. - Add support for samples to contain IPC ratio, collecting cycles information from CYC packets, showing the IPC info periodically - Allow using time ranges - lots of updates to perf pmu, perf stat, perf trace, eBPF support, perf record, perf diff, etc. - please see the shortlog and Git log for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (252 commits) tools arch x86: Sync asm/cpufeatures.h with the with the kernel tools build: Check if gettid() is available before providing helper perf jvmti: Address gcc string overflow warning for strncpy() perf python: Remove -fstack-protector-strong if clang doesn't have it perf annotate TUI browser: Do not use member from variable within its own initialization perf tests: Fix record+probe_libc_inet_pton.sh for powerpc64 perf evsel: Do not rely on errno values for precise_ip fallback perf thread: Allow references to thread objects after machine__exit() perf header: Assign proper ff->ph in perf_event__synthesize_features() tools arch kvm: Sync kvm headers with the kernel sources perf script: Allow specifying the files to process guest samples perf tools metric: Don't include duration_time in group perf list: Avoid extra : for --raw metrics perf vendor events intel: Metric fixes for SKX/CLX perf tools: Fix typos / broken sentences perf jevents: Add support for Hisi hip08 L3C PMU aliasing perf jevents: Add support for Hisi hip08 HHA PMU aliasing perf jevents: Add support for Hisi hip08 DDRC PMU aliasing perf pmu: Support more complex PMU event aliasing perf diff: Documentation -c cycles option ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/events/Makefile2
-rw-r--r--arch/x86/events/core.c106
-rw-r--r--arch/x86/events/intel/core.c185
-rw-r--r--arch/x86/events/intel/cstate.c153
-rw-r--r--arch/x86/events/intel/rapl.c379
-rw-r--r--arch/x86/events/intel/uncore.c122
-rw-r--r--arch/x86/events/intel/uncore.h41
-rw-r--r--arch/x86/events/intel/uncore_snb.c101
-rw-r--r--arch/x86/events/intel/uncore_snbep.c601
-rw-r--r--arch/x86/events/msr.c110
-rw-r--r--arch/x86/events/perf_event.h7
-rw-r--r--arch/x86/events/probe.c45
-rw-r--r--arch/x86/events/probe.h29
13 files changed, 1336 insertions, 545 deletions
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9cbfd34042d5..9e07f554333f 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += core.o
+obj-y += core.o probe.o
obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index ceb712b0a1c6..81b005e4c7d9 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1618,68 +1618,6 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = {
.attrs = NULL,
};
-/*
- * Remove all undefined events (x86_pmu.event_map(id) == 0)
- * out of events_attr attributes.
- */
-static void __init filter_events(struct attribute **attrs)
-{
- struct device_attribute *d;
- struct perf_pmu_events_attr *pmu_attr;
- int offset = 0;
- int i, j;
-
- for (i = 0; attrs[i]; i++) {
- d = (struct device_attribute *)attrs[i];
- pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
- /* str trumps id */
- if (pmu_attr->event_str)
- continue;
- if (x86_pmu.event_map(i + offset))
- continue;
-
- for (j = i; attrs[j]; j++)
- attrs[j] = attrs[j + 1];
-
- /* Check the shifted attr. */
- i--;
-
- /*
- * event_map() is index based, the attrs array is organized
- * by increasing event index. If we shift the events, then
- * we need to compensate for the event_map(), otherwise
- * we are looking up the wrong event in the map
- */
- offset++;
- }
-}
-
-/* Merge two pointer arrays */
-__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
-{
- struct attribute **new;
- int j, i;
-
- for (j = 0; a && a[j]; j++)
- ;
- for (i = 0; b && b[i]; i++)
- j++;
- j++;
-
- new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
- if (!new)
- return NULL;
-
- j = 0;
- for (i = 0; a && a[i]; i++)
- new[j++] = a[i];
- for (i = 0; b && b[i]; i++)
- new[j++] = b[i];
- new[j] = NULL;
-
- return new;
-}
-
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
@@ -1744,9 +1682,24 @@ static struct attribute *events_attr[] = {
NULL,
};
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static umode_t
+is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+ /* str trumps id */
+ return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0;
+}
+
static struct attribute_group x86_pmu_events_group __ro_after_init = {
.name = "events",
.attrs = events_attr,
+ .is_visible = is_visible,
};
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
@@ -1842,37 +1795,10 @@ static int __init init_hw_perf_events(void)
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
- if (x86_pmu.caps_attrs) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
- if (!WARN_ON(!tmp))
- x86_pmu_caps_group.attrs = tmp;
- }
-
- if (x86_pmu.event_attrs)
- x86_pmu_events_group.attrs = x86_pmu.event_attrs;
-
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
- else
- filter_events(x86_pmu_events_group.attrs);
- if (x86_pmu.cpu_events) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
- if (!WARN_ON(!tmp))
- x86_pmu_events_group.attrs = tmp;
- }
-
- if (x86_pmu.attrs) {
- struct attribute **tmp;
-
- tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
- if (!WARN_ON(!tmp))
- x86_pmu_attr_group.attrs = tmp;
- }
+ pmu.attr_update = x86_pmu.attr_update;
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a5436cee20b1..bda450ff51ee 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -20,6 +20,7 @@
#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
+#include <asm/hypervisor.h>
#include "../perf_event.h"
@@ -3897,8 +3898,6 @@ static __initconst const struct x86_pmu core_pmu = {
.check_period = intel_pmu_check_period,
};
-static struct attribute *intel_pmu_attrs[];
-
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -3930,8 +3929,6 @@ static __initconst const struct x86_pmu intel_pmu = {
.format_attrs = intel_arch3_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
- .attrs = intel_pmu_attrs,
-
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
@@ -4055,6 +4052,13 @@ static bool check_msr(unsigned long msr, u64 mask)
u64 val_old, val_new, val_tmp;
/*
+ * Disable the check for real HW, so we don't
+ * mess with potentionaly enabled registers:
+ */
+ if (hypervisor_is_type(X86_HYPER_NATIVE))
+ return true;
+
+ /*
* Read the current value, change it and read it back to see if it
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
@@ -4274,13 +4278,6 @@ static struct attribute *icl_tsx_events_attrs[] = {
NULL,
};
-static __init struct attribute **get_icl_events_attrs(void)
-{
- return boot_cpu_has(X86_FEATURE_RTM) ?
- merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
- icl_events_attrs;
-}
-
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -4402,43 +4399,111 @@ static DEVICE_ATTR(allow_tsx_force_abort, 0644,
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
- NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
+ &dev_attr_allow_tsx_force_abort.attr,
NULL,
};
-static __init struct attribute **
-get_events_attrs(struct attribute **base,
- struct attribute **mem,
- struct attribute **tsx)
+static umode_t
+tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- struct attribute **attrs = base;
- struct attribute **old;
+ return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
+}
- if (mem && x86_pmu.pebs)
- attrs = merge_attr(attrs, mem);
+static umode_t
+pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.pebs ? attr->mode : 0;
+}
- if (tsx && boot_cpu_has(X86_FEATURE_RTM)) {
- old = attrs;
- attrs = merge_attr(attrs, tsx);
- if (old != base)
- kfree(old);
- }
+static umode_t
+lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+}
- return attrs;
+static umode_t
+exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.version >= 2 ? attr->mode : 0;
}
+static umode_t
+default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &dev_attr_allow_tsx_force_abort.attr)
+ return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group group_events_td = {
+ .name = "events",
+};
+
+static struct attribute_group group_events_mem = {
+ .name = "events",
+ .is_visible = pebs_is_visible,
+};
+
+static struct attribute_group group_events_tsx = {
+ .name = "events",
+ .is_visible = tsx_is_visible,
+};
+
+static struct attribute_group group_caps_gen = {
+ .name = "caps",
+ .attrs = intel_pmu_caps_attrs,
+};
+
+static struct attribute_group group_caps_lbr = {
+ .name = "caps",
+ .attrs = lbr_attrs,
+ .is_visible = lbr_is_visible,
+};
+
+static struct attribute_group group_format_extra = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_format_extra_skl = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_default = {
+ .attrs = intel_pmu_attrs,
+ .is_visible = default_is_visible,
+};
+
+static const struct attribute_group *attr_update[] = {
+ &group_events_td,
+ &group_events_mem,
+ &group_events_tsx,
+ &group_caps_gen,
+ &group_caps_lbr,
+ &group_format_extra,
+ &group_format_extra_skl,
+ &group_default,
+ NULL,
+};
+
+static struct attribute *empty_attrs;
+
__init int intel_pmu_init(void)
{
- struct attribute **extra_attr = NULL;
- struct attribute **mem_attr = NULL;
- struct attribute **tsx_attr = NULL;
- struct attribute **to_free = NULL;
+ struct attribute **extra_skl_attr = &empty_attrs;
+ struct attribute **extra_attr = &empty_attrs;
+ struct attribute **td_attr = &empty_attrs;
+ struct attribute **mem_attr = &empty_attrs;
+ struct attribute **tsx_attr = &empty_attrs;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
struct event_constraint *c;
unsigned int unused;
struct extra_reg *er;
+ bool pmem = false;
int version, i;
char *name;
@@ -4596,7 +4661,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = slm_events_attrs;
+ td_attr = slm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Silvermont events, ");
name = "silvermont";
@@ -4624,7 +4689,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Goldmont events, ");
name = "goldmont";
@@ -4651,7 +4716,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
event_attr_td_total_slots_scale_glm.event_str = "4";
extra_attr = slm_format_attr;
@@ -4740,7 +4805,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
@@ -4781,7 +4846,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
@@ -4818,10 +4883,10 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Haswell events, ");
@@ -4860,10 +4925,10 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Broadwell events, ");
@@ -4890,9 +4955,10 @@ __init int intel_pmu_init(void)
name = "knights-landing";
break;
+ case INTEL_FAM6_SKYLAKE_X:
+ pmem = true;
case INTEL_FAM6_SKYLAKE_MOBILE:
case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_add_quirk(intel_pebs_isolation_quirk);
@@ -4920,27 +4986,28 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = hsw_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
- extra_attr = merge_attr(extra_attr, skl_format_attr);
- to_free = extra_attr;
- x86_pmu.cpu_events = hsw_events_attrs;
+ extra_skl_attr = skl_format_attr;
+ td_attr = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
- intel_pmu_pebs_data_source_skl(
- boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
+ intel_pmu_pebs_data_source_skl(pmem);
if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
x86_pmu.flags |= PMU_FL_TFA;
x86_pmu.get_event_constraints = tfa_get_event_constraints;
x86_pmu.enable_all = intel_tfa_pmu_enable_all;
x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
- intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
}
pr_cont("Skylake events, ");
name = "skylake";
break;
+ case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_XEON_D:
+ pmem = true;
case INTEL_FAM6_ICELAKE_MOBILE:
+ case INTEL_FAM6_ICELAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4959,11 +5026,12 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = icl_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
- extra_attr = merge_attr(extra_attr, skl_format_attr);
- x86_pmu.cpu_events = get_icl_events_attrs();
+ extra_skl_attr = skl_format_attr;
+ mem_attr = icl_events_attrs;
+ tsx_attr = icl_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
x86_pmu.lbr_pt_coexist = true;
- intel_pmu_pebs_data_source_skl(false);
+ intel_pmu_pebs_data_source_skl(pmem);
pr_cont("Icelake events, ");
name = "icelake";
break;
@@ -4988,14 +5056,14 @@ __init int intel_pmu_init(void)
snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
- if (version >= 2 && extra_attr) {
- x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
- extra_attr);
- WARN_ON(!x86_pmu.format_attrs);
- }
- x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events,
- mem_attr, tsx_attr);
+ group_events_td.attrs = td_attr;
+ group_events_mem.attrs = mem_attr;
+ group_events_tsx.attrs = tsx_attr;
+ group_format_extra.attrs = extra_attr;
+ group_format_extra_skl.attrs = extra_skl_attr;
+
+ x86_pmu.attr_update = attr_update;
if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -5043,12 +5111,8 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}
- x86_pmu.caps_attrs = intel_pmu_caps_attrs;
-
- if (x86_pmu.lbr_nr) {
- x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs);
+ if (x86_pmu.lbr_nr)
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
- }
/*
* Access extra MSR may cause #GP under certain circumstances.
@@ -5078,7 +5142,6 @@ __init int intel_pmu_init(void)
if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
- kfree(to_free);
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 267d7f8e12ab..688592b34564 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -96,6 +96,7 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
+#include "../probe.h"
MODULE_LICENSE("GPL");
@@ -144,25 +145,42 @@ enum perf_cstate_core_events {
PERF_CSTATE_CORE_EVENT_MAX,
};
-PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03");
-static struct perf_cstate_msr core_msr[] = {
- [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
- [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
- [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
- [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
+static unsigned long core_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_core_c1);
+PMU_EVENT_GROUP(events, cstate_core_c3);
+PMU_EVENT_GROUP(events, cstate_core_c6);
+PMU_EVENT_GROUP(events, cstate_core_c7);
+
+static bool test_msr(int idx, void *data)
+{
+ return test_bit(idx, (unsigned long *) data);
+}
+
+static struct perf_msr core_msr[] = {
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr },
};
-static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+static struct attribute *attrs_empty[] = {
NULL,
};
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
static struct attribute_group core_events_attr_group = {
.name = "events",
- .attrs = core_events_attrs,
+ .attrs = attrs_empty,
};
DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
@@ -211,31 +229,37 @@ enum perf_cstate_pkg_events {
PERF_CSTATE_PKG_EVENT_MAX,
};
-PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
-PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
-PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
-PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
-
-static struct perf_cstate_msr pkg_msr[] = {
- [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
- [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
- [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
- [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
- [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
- [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
- [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
-};
-
-static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
- NULL,
+PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06");
+
+static unsigned long pkg_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_pkg_c2);
+PMU_EVENT_GROUP(events, cstate_pkg_c3);
+PMU_EVENT_GROUP(events, cstate_pkg_c6);
+PMU_EVENT_GROUP(events, cstate_pkg_c7);
+PMU_EVENT_GROUP(events, cstate_pkg_c8);
+PMU_EVENT_GROUP(events, cstate_pkg_c9);
+PMU_EVENT_GROUP(events, cstate_pkg_c10);
+
+static struct perf_msr pkg_msr[] = {
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr },
};
static struct attribute_group pkg_events_attr_group = {
.name = "events",
- .attrs = pkg_events_attrs,
+ .attrs = attrs_empty,
};
DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
@@ -289,7 +313,8 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (event->pmu == &cstate_core_pmu) {
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
return -EINVAL;
- if (!core_msr[cfg].attr)
+ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX);
+ if (!(core_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_core_cpu_mask,
@@ -298,7 +323,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
- if (!pkg_msr[cfg].attr)
+ if (!(pkg_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
@@ -421,8 +446,28 @@ static int cstate_cpu_init(unsigned int cpu)
return 0;
}
+const struct attribute_group *core_attr_update[] = {
+ &group_cstate_core_c1,
+ &group_cstate_core_c3,
+ &group_cstate_core_c6,
+ &group_cstate_core_c7,
+ NULL,
+};
+
+const struct attribute_group *pkg_attr_update[] = {
+ &group_cstate_pkg_c2,
+ &group_cstate_pkg_c3,
+ &group_cstate_pkg_c6,
+ &group_cstate_pkg_c7,
+ &group_cstate_pkg_c8,
+ &group_cstate_pkg_c9,
+ &group_cstate_pkg_c10,
+ NULL,
+};
+
static struct pmu cstate_core_pmu = {
.attr_groups = core_attr_groups,
+ .attr_update = core_attr_update,
.name = "cstate_core",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
@@ -437,6 +482,7 @@ static struct pmu cstate_core_pmu = {
static struct pmu cstate_pkg_pmu = {
.attr_groups = pkg_attr_groups,
+ .attr_update = pkg_attr_update,
.name = "cstate_pkg",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
@@ -580,35 +626,11 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there are no available events.
- */
-static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
- struct perf_cstate_msr *msr,
- struct attribute **attrs)
-{
- bool found = false;
- unsigned int bit;
- u64 val;
-
- for (bit = 0; bit < max; bit++) {
- if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
- *attrs++ = &msr[bit].attr->attr.attr;
- found = true;
- } else {
- msr[bit].attr = NULL;
- }
- }
- *attrs = NULL;
-
- return found;
-}
-
static int __init cstate_probe(const struct cstate_model *cm)
{
/* SLM has different MSR for PKG C6 */
@@ -620,13 +642,14 @@ static int __init cstate_probe(const struct cstate_model *cm)
pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
- has_cstate_core = cstate_probe_msr(cm->core_events,
- PERF_CSTATE_CORE_EVENT_MAX,
- core_msr, core_events_attrs);
+ core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX,
+ true, (void *) &cm->core_events);
+
+ pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
+ true, (void *) &cm->pkg_events);
- has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
- PERF_CSTATE_PKG_EVENT_MAX,
- pkg_msr, pkg_events_attrs);
+ has_cstate_core = !!core_msr_mask;
+ has_cstate_pkg = !!pkg_msr_mask;
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 8c7ecde3ba70..64ab51ffdf06 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,27 +55,28 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
+#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
+#include "../probe.h"
MODULE_LICENSE("GPL");
/*
* RAPL energy status counters
*/
-#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
-#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
-#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
-#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
-#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
-#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
-#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
-#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
-#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
-
-#define NR_RAPL_DOMAINS 0x5
+enum perf_rapl_events {
+ PERF_RAPL_PP0 = 0, /* all cores */
+ PERF_RAPL_PKG, /* entire package */
+ PERF_RAPL_RAM, /* DRAM */
+ PERF_RAPL_PP1, /* gpu */
+ PERF_RAPL_PSYS, /* psys */
+
+ PERF_RAPL_MAX,
+ NR_RAPL_DOMAINS = PERF_RAPL_MAX,
+};
+
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
@@ -84,33 +85,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"psys",
};
-/* Clients have PP0, PKG */
-#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM */
-#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM, PP1 */
-#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
-#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
- 1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT|\
- 1<<RAPL_IDX_PP1_NRG_STAT|\
- 1<<RAPL_IDX_PSYS_NRG_STAT)
-
-/* Knights Landing has PKG, RAM */
-#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
- 1<<RAPL_IDX_RAM_NRG_STAT)
-
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -153,12 +127,18 @@ struct rapl_pmus {
struct rapl_pmu *pmus[];
};
+struct rapl_model {
+ unsigned long events;
+ bool apply_quirk;
+};
+
/* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
+static struct perf_msr rapl_msrs[];
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
@@ -350,7 +330,7 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags)
static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- int bit, msr, ret = 0;
+ int bit, ret = 0;
struct rapl_pmu *pmu;
/* only look at RAPL events */
@@ -366,33 +346,12 @@ static int rapl_pmu_event_init(struct perf_event *event)
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
- /*
- * check event is known (determines counter)
- */
- switch (cfg) {
- case INTEL_RAPL_PP0:
- bit = RAPL_IDX_PP0_NRG_STAT;
- msr = MSR_PP0_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PKG:
- bit = RAPL_IDX_PKG_NRG_STAT;
- msr = MSR_PKG_ENERGY_STATUS;
- break;
- case INTEL_RAPL_RAM:
- bit = RAPL_IDX_RAM_NRG_STAT;
- msr = MSR_DRAM_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PP1:
- bit = RAPL_IDX_PP1_NRG_STAT;
- msr = MSR_PP1_ENERGY_STATUS;
- break;
- case INTEL_RAPL_PSYS:
- bit = RAPL_IDX_PSYS_NRG_STAT;
- msr = MSR_PLATFORM_ENERGY_STATUS;
- break;
- default:
+ if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL;
- }
+
+ cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
+ bit = cfg - 1;
+
/* check event supported */
if (!(rapl_cntr_mask & (1 << bit)))
return -EINVAL;
@@ -407,7 +366,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu;
- event->hw.event_base = msr;
+ event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg;
event->hw.idx = bit;
@@ -457,110 +416,111 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
-static struct attribute *rapl_events_srv_attr[] = {
- EVENT_PTR(rapl_cores),
- EVENT_PTR(rapl_pkg),
- EVENT_PTR(rapl_ram),
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
+static struct attribute *attrs_empty[] = {
+ NULL,
+};
- EVENT_PTR(rapl_cores_unit),
- EVENT_PTR(rapl_pkg_unit),
- EVENT_PTR(rapl_ram_unit),