summaryrefslogtreecommitdiffstats
path: root/arch/x86/events
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-12 14:14:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-12 14:14:35 -0700
commit3bff6112c80cecb76af5fe485506f96e8adb6122 (patch)
tree3a2507f01d3d2d3296742ae1dc59f4ea40d12705 /arch/x86/events
parentdd502a81077a5f3b3e19fa9a1accffdcab5ad5bc (diff)
parentf91072ed1b7283b13ca57fcfbece5a3b92726143 (diff)
Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar: "x86 Intel updates: - Add Jasper Lake support - Add support for TopDown metrics on Ice Lake - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support - Add a PCI sub driver to support uncore PMUs where the PCI resources have been claimed already - extending the range of supported systems. x86 AMD updates: - Restore 'perf stat -a' behaviour to program the uncore PMU to count all CPU threads. - Fix setting the proper count when sampling Large Increment per Cycle events / 'paired' events. - Fix IBS Fetch sampling on F17h and some other IBS fine tuning, greatly reducing the number of interrupts when large sample periods are specified. - Extends Family 17h RAPL support to also work on compatible F19h machines. Core code updates: - Fix race in perf_mmap_close() - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be closed if the leader is removed. - Smaller fixes and updates" * tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) perf/core: Fix race in the perf_mmap_close() function perf/x86: Fix n_metric for cancelled txn perf/x86: Fix n_pair for cancelled txn x86/events/amd/iommu: Fix sizeof mismatch perf/x86/intel: Check perf metrics feature for each CPU perf/x86/intel: Fix Ice Lake event constraint table perf/x86/intel/uncore: Fix the scale of the IMC free-running events perf/x86/intel/uncore: Fix for iio mapping on Skylake Server perf/x86/msr: Add Jasper Lake support perf/x86/intel: Add Jasper Lake support perf/x86/intel/uncore: Reduce the number of CBOX counters perf/x86/intel/uncore: Update Ice Lake uncore units perf/x86/intel/uncore: Split the Ice Lake and Tiger Lake MSR uncore support perf/x86/intel/uncore: Support PCIe3 unit on Snow Ridge perf/x86/intel/uncore: Generic support for the PCI sub driver perf/x86/intel/uncore: Factor out uncore_pci_pmu_unregister() perf/x86/intel/uncore: Factor out uncore_pci_pmu_register() perf/x86/intel/uncore: Factor out uncore_pci_find_dev_pmu() perf/x86/intel/uncore: Factor out uncore_pci_get_dev_die_info() perf/amd/uncore: Inform the user how many counters each uncore PMU has ...
Diffstat (limited to 'arch/x86/events')
-rw-r--r--arch/x86/events/amd/ibs.c93
-rw-r--r--arch/x86/events/amd/iommu.c2
-rw-r--r--arch/x86/events/amd/uncore.c186
-rw-r--r--arch/x86/events/core.c91
-rw-r--r--arch/x86/events/intel/core.c364
-rw-r--r--arch/x86/events/intel/ds.c32
-rw-r--r--arch/x86/events/intel/uncore.c275
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/intel/uncore_snb.c45
-rw-r--r--arch/x86/events/intel/uncore_snbep.c72
-rw-r--r--arch/x86/events/msr.c1
-rw-r--r--arch/x86/events/perf_event.h54
-rw-r--r--arch/x86/events/rapl.c1
13 files changed, 992 insertions, 226 deletions
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 26c36357c4c9..40669eac9d6d 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -89,6 +89,7 @@ struct perf_ibs {
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
+ unsigned int fetch_count_reset_broken : 1;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
@@ -334,11 +335,18 @@ static u64 get_ibs_op_count(u64 config)
{
u64 count = 0;
- if (config & IBS_OP_VAL)
- count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
-
- if (ibs_caps & IBS_CAPS_RDWROPCNT)
- count += (config & IBS_OP_CUR_CNT) >> 32;
+ /*
+ * If the internal 27-bit counter rolled over, the count is MaxCnt
+ * and the lower 7 bits of CurCnt are randomized.
+ * Otherwise CurCnt has the full 27-bit current counter value.
+ */
+ if (config & IBS_OP_VAL) {
+ count = (config & IBS_OP_MAX_CNT) << 4;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ count += config & IBS_OP_MAX_CNT_EXT_MASK;
+ } else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
+ count = (config & IBS_OP_CUR_CNT) >> 32;
+ }
return count;
}
@@ -363,7 +371,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+ u64 tmp = hwc->config | config;
+
+ if (perf_ibs->fetch_count_reset_broken)
+ wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
+
+ wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
}
/*
@@ -394,7 +407,7 @@ static void perf_ibs_start(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
- u64 period;
+ u64 period, config = 0;
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
@@ -403,13 +416,19 @@ static void perf_ibs_start(struct perf_event *event, int flags)
hwc->state = 0;
perf_ibs_set_period(perf_ibs, hwc, &period);
+ if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
+ config |= period & IBS_OP_MAX_CNT_EXT_MASK;
+ period &= ~IBS_OP_MAX_CNT_EXT_MASK;
+ }
+ config |= period >> 4;
+
/*
* Set STARTED before enabling the hardware, such that a subsequent NMI
* must observe it.
*/
set_bit(IBS_STARTED, pcpu->state);
clear_bit(IBS_STOPPING, pcpu->state);
- perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+ perf_ibs_enable_event(perf_ibs, hwc, config);
perf_event_update_userpage(event);
}
@@ -577,7 +596,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
struct perf_ibs_data ibs_data;
int offset, size, check_rip, offset_max, throttle = 0;
unsigned int msr;
- u64 *buf, *config, period;
+ u64 *buf, *config, period, new_config = 0;
if (!test_bit(IBS_STARTED, pcpu->state)) {
fail:
@@ -626,18 +645,24 @@ fail:
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
+ /*
+ * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
+ * depending on their availability.
+ * Can't add to offset_max as they are staggered
+ */
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- /*
- * Read IbsBrTarget and IbsOpData4 separately
- * depending on their availability.
- * Can't add to offset_max as they are staggered
- */
- if (ibs_caps & IBS_CAPS_BRNTRGT) {
- rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
- size++;
+ if (perf_ibs == &perf_ibs_op) {
+ if (ibs_caps & IBS_CAPS_BRNTRGT) {
+ rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+ size++;
+ }
+ if (ibs_caps & IBS_CAPS_OPDATA4) {
+ rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+ size++;
+ }
}
- if (ibs_caps & IBS_CAPS_OPDATA4) {
- rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+ if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
+ rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
size++;
}
}
@@ -666,13 +691,17 @@ out:
if (throttle) {
perf_ibs_stop(event, 0);
} else {
- period >>= 4;
-
- if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
- (*config & IBS_OP_CNT_CTL))
- period |= *config & IBS_OP_CUR_CNT_RAND;
+ if (perf_ibs == &perf_ibs_op) {
+ if (ibs_caps & IBS_CAPS_OPCNTEXT) {
+ new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
+ period &= ~IBS_OP_MAX_CNT_EXT_MASK;
+ }
+ if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
+ new_config |= *config & IBS_OP_CUR_CNT_RAND;
+ }
+ new_config |= period >> 4;
- perf_ibs_enable_event(perf_ibs, hwc, period);
+ perf_ibs_enable_event(perf_ibs, hwc, new_config);
}
perf_event_update_userpage(event);
@@ -733,12 +762,26 @@ static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
+ /*
+ * Some chips fail to reset the fetch count when it is written; instead
+ * they need a 0-1 transition of IbsFetchEn.
+ */
+ if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
+ perf_ibs_fetch.fetch_count_reset_broken = 1;
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
*attr++ = &format_attr_cnt_ctl.attr;
}
+
+ if (ibs_caps & IBS_CAPS_OPCNTEXT) {
+ perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ }
+
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index fb616203ce42..be50ef8572cc 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -379,7 +379,7 @@ static __init int _init_events_attrs(void)
while (amd_iommu_v2_event_descs[i].attr.attr.name)
i++;
- attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL);
+ attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 76400c052b0e..7f014d450bc2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -181,28 +181,28 @@ static void amd_uncore_del(struct perf_event *event, int flags)
}
/*
- * Convert logical CPU number to L3 PMC Config ThreadMask format
+ * Return a full thread and slice mask unless user
+ * has provided them
*/
-static u64 l3_thread_slice_mask(int cpu)
+static u64 l3_thread_slice_mask(u64 config)
{
- u64 thread_mask, core = topology_core_id(cpu);
- unsigned int shift, thread = 0;
+ if (boot_cpu_data.x86 <= 0x18)
+ return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
+ ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
- if (topology_smt_supported() && !topology_is_primary_thread(cpu))
- thread = 1;
-
- if (boot_cpu_data.x86 <= 0x18) {
- shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
- thread_mask = BIT_ULL(shift);
-
- return AMD64_L3_SLICE_MASK | thread_mask;
- }
-
- core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
- shift = AMD64_L3_THREAD_SHIFT + thread;
- thread_mask = BIT_ULL(shift);
+ /*
+ * If the user doesn't specify a threadmask, they're not trying to
+ * count core 0, so we enable all cores & threads.
+ * We'll also assume that they want to count slice 0 if they specify
+ * a threadmask and leave sliceid and enallslices unpopulated.
+ */
+ if (!(config & AMD64_L3_F19H_THREAD_MASK))
+ return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
+ AMD64_L3_EN_ALL_CORES;
- return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
+ return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
+ AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
+ AMD64_L3_COREID_MASK);
}
static int amd_uncore_event_init(struct perf_event *event)
@@ -232,7 +232,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* For other events, the two fields do not affect the count.
*/
if (l3_mask && is_llc_event(event))
- hwc->config |= l3_thread_slice_mask(event->cpu);
+ hwc->config |= l3_thread_slice_mask(event->attr.config);
uncore = event_to_amd_uncore(event);
if (!uncore)
@@ -274,47 +274,72 @@ static struct attribute_group amd_uncore_attr_group = {
.attrs = amd_uncore_attrs,
};
-/*
- * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
- * on family
- */
-#define AMD_FORMAT_ATTR(_dev, _name, _format) \
-static ssize_t \
-_dev##_show##_name(struct device *dev, \
- struct device_attribute *attr, \
- char *page) \
-{ \
- BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
- return sprintf(page, _format "\n"); \
-} \
-static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
-
-/* Used for each uncore counter type */
-#define AMD_ATTRIBUTE(_name) \
-static struct attribute *amd_uncore_format_attr_##_name[] = { \
- &format_attr_event_##_name.attr, \
- &format_attr_umask.attr, \
- NULL, \
-}; \
-static struct attribute_group amd_uncore_format_group_##_name = { \
- .name = "format", \
- .attrs = amd_uncore_format_attr_##_name, \
-}; \
-static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \
- &amd_uncore_attr_group, \
- &amd_uncore_format_group_##_name, \
- NULL, \
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
+DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
+DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
+
+static struct attribute *amd_uncore_df_format_attr[] = {
+ &format_attr_event12.attr, /* event14 if F17h+ */
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static struct attribute *amd_uncore_l3_format_attr[] = {
+ &format_attr_event12.attr, /* event8 if F17h+ */
+ &format_attr_umask.attr,
+ NULL, /* slicemask if F17h, coreid if F19h */
+ NULL, /* threadmask8 if F17h, enallslices if F19h */
+ NULL, /* enallcores if F19h */
+ NULL, /* sliceid if F19h */
+ NULL, /* threadmask2 if F19h */
+ NULL,
+};
+
+static struct attribute_group amd_uncore_df_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_df_format_attr,
};
-AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
-AMD_FORMAT_ATTR(umask, , "config:8-15");
-AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
-AMD_FORMAT_ATTR(event, _l3, "config:0-7");
-AMD_ATTRIBUTE(df);
-AMD_ATTRIBUTE(l3);
+static struct attribute_group amd_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_l3_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_df_attr_groups[] = {
+ &amd_uncore_attr_group,
+ &amd_uncore_df_format_group,
+ NULL,
+};
+
+static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
+ &amd_uncore_attr_group,
+ &amd_uncore_l3_format_group,
+ NULL,
+};
static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context,
+ .attr_groups = amd_uncore_df_attr_groups,
+ .name = "amd_nb",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
@@ -326,6 +351,8 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context,
+ .attr_groups = amd_uncore_l3_attr_groups,
+ .name = "amd_l2",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
@@ -529,6 +556,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
static int __init amd_uncore_init(void)
{
+ struct attribute **df_attr = amd_uncore_df_format_attr;
+ struct attribute **l3_attr = amd_uncore_l3_format_attr;
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
@@ -538,6 +567,8 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;
+ num_counters_nb = NUM_COUNTERS_NB;
+ num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) {
/*
* For F17h and above, the Northbridge counters are
@@ -545,27 +576,16 @@ static int __init amd_uncore_init(void)
* counters are supported too. The PMUs are exported
* based on family as either L2 or L3 and NB or DF.
*/
- num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L3;
amd_nb_pmu.name = "amd_df";
amd_llc_pmu.name = "amd_l3";
- format_attr_event_df.show = &event_show_df;
- format_attr_event_l3.show = &event_show_l3;
l3_mask = true;
- } else {
- num_counters_nb = NUM_COUNTERS_NB;
- num_counters_llc = NUM_COUNTERS_L2;
- amd_nb_pmu.name = "amd_nb";
- amd_llc_pmu.name = "amd_l2";
- format_attr_event_df = format_attr_event;
- format_attr_event_l3 = format_attr_event;
- l3_mask = false;
}
- amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
- amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
-
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
+ if (boot_cpu_data.x86 >= 0x17)
+ *df_attr = &format_attr_event14.attr;
+
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
ret = -ENOMEM;
@@ -575,13 +595,29 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
- pr_info("%s NB counters detected\n",
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
- "HYGON" : "AMD");
+ pr_info("%d %s %s counters detected\n", num_counters_nb,
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
+ amd_nb_pmu.name);
+
ret = 0;
}
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
+ if (boot_cpu_data.x86 >= 0x19) {
+ *l3_attr++ = &format_attr_event8.attr;
+ *l3_attr++ = &format_attr_umask.attr;
+ *l3_attr++ = &format_attr_coreid.attr;
+ *l3_attr++ = &format_attr_enallslices.attr;
+ *l3_attr++ = &format_attr_enallcores.attr;
+ *l3_attr++ = &format_attr_sliceid.attr;
+ *l3_attr++ = &format_attr_threadmask2.attr;
+ } else if (boot_cpu_data.x86 >= 0x17) {
+ *l3_attr++ = &format_attr_event8.attr;
+ *l3_attr++ = &format_attr_umask.attr;
+ *l3_attr++ = &format_attr_slicemask.attr;
+ *l3_attr++ = &format_attr_threadmask8.attr;
+ }
+
amd_uncore_llc = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_llc) {
ret = -ENOMEM;
@@ -591,9 +627,9 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_llc;
- pr_info("%s LLC counters detected\n",
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
- "HYGON" : "AMD");
+ pr_info("%d %s %s counters detected\n", num_counters_llc,
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
+ amd_llc_pmu.name);
ret = 0;
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 360c395d51d0..a88c94d65693 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -105,6 +105,9 @@ u64 x86_perf_event_update(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
+ if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
+ return x86_pmu.update_topdown_event(event);
+
/*
* Careful: an NMI might modify the previous event value.
*
@@ -1056,6 +1059,45 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
return unsched ? -EINVAL : 0;
}
+static int add_nr_metric_event(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (is_metric_event(event)) {
+ if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
+ return -EINVAL;
+ cpuc->n_metric++;
+ cpuc->n_txn_metric++;
+ }
+
+ return 0;
+}
+
+static void del_nr_metric_event(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (is_metric_event(event))
+ cpuc->n_metric--;
+}
+
+static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
+ int max_count, int n)
+{
+
+ if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
+ return -EINVAL;
+
+ if (n >= max_count + cpuc->n_metric)
+ return -EINVAL;
+
+ cpuc->event_list[n] = event;
+ if (is_counter_pair(&event->hw)) {
+ cpuc->n_pair++;
+ cpuc->n_txn_pair++;
+ }
+
+ return 0;
+}
+
/*
* dogrp: true if must collect siblings events (group)
* returns total number of events and error code
@@ -1092,28 +1134,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
}
if (is_x86_event(leader)) {
- if (n >= max_count)
+ if (collect_event(cpuc, leader, max_count, n))
return -EINVAL;
- cpuc->event_list[n] = leader;
n++;
- if (is_counter_pair(&leader->hw))
- cpuc->n_pair++;
}
+
if (!dogrp)
return n;
for_each_sibling_event(event, leader) {
- if (!is_x86_event(event) ||
- event->state <= PERF_EVENT_STATE_OFF)
+ if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
continue;
- if (n >= max_count)
+ if (collect_event(cpuc, event, max_count, n))
return -EINVAL;
- cpuc->event_list[n] = event;
n++;
- if (is_counter_pair(&event->hw))
- cpuc->n_pair++;
}
return n;
}
@@ -1135,11 +1171,16 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
break;
+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
+ /* All the metric events are mapped onto the fixed counter 3. */
+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
+ /* fall through */
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
(idx - INTEL_PMC_IDX_FIXED);
- hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+ hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
+ INTEL_PMC_FIXED_RDPMC_BASE;
break;
default:
@@ -1270,6 +1311,10 @@ int x86_perf_event_set_period(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
+ if (unlikely(is_topdown_count(event)) &&
+ x86_pmu.set_topdown_event_period)
+ return x86_pmu.set_topdown_event_period(event);
+
/*
* If we are way outside a reasonable range then just skip forward:
*/
@@ -1309,11 +1354,11 @@ int x86_perf_event_set_period(struct perf_event *event)
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
- * Clear the Merge event counter's upper 16 bits since
+ * Sign extend the Merge event counter's upper 16 bits since
* we currently declare a 48-bit counter width
*/
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_event_addr(idx + 1), 0);
+ wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
/*
* Due to erratum on certan cpu we need
@@ -1551,6 +1596,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
+ if (x86_pmu.intel_cap.perf_metrics)
+ del_nr_metric_event(cpuc, event);
perf_event_update_userpage(event);
@@ -2018,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
perf_pmu_disable(pmu);
__this_cpu_write(cpu_hw_events.n_txn, 0);
+ __this_cpu_write(cpu_hw_events.n_txn_pair, 0);
+ __this_cpu_write(cpu_hw_events.n_txn_metric, 0);
}
/*
@@ -2043,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
*/
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
+ __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
+ __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
perf_pmu_enable(pmu);
}
@@ -2264,17 +2315,15 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m
static int x86_pmu_event_idx(struct perf_event *event)
{
- int idx = event->hw.idx;
+ struct hw_perf_event *hwc = &event->hw;
- if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
- if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
- idx -= INTEL_PMC_IDX_FIXED;
- idx |= 1 << 30;
- }
-
- return idx + 1;
+ if (is_metric_idx(hwc->idx))
+ return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
+ else
+ return hwc->event_base_rdpmc + 1;
}
static ssize_t get_attr_rdpmc(struct device *cdev,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 31e6887d24f1..f1926e9f2143 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -243,10 +243,14 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
static struct event_constraint intel_icl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
@@ -309,6 +313,12 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");
+EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
+EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
+EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
+
static struct attribute *snb_events_attrs[] = {
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
@@ -2165,11 +2175,24 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
static void intel_pmu_disable_fixed(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask;
+ int idx = hwc->idx;
- mask = 0xfULL << (idx * 4);
+ if (is_topdown_idx(idx)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * When there are other active TopDown events,
+ * don't disable the fixed counter 3.
+ */
+ if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
+ return;
+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
+ }
+ intel_clear_masks(event, idx);
+
+ mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
wrmsrl(hwc->config_base, ctrl_val);
@@ -2180,17 +2203,28 @@ static void intel_pmu_disable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- if (idx < INTEL_PMC_IDX_FIXED) {
+ switch (idx) {
+ case 0 ... INTEL_PMC_IDX_FIXED - 1:
intel_clear_masks(event, idx);
x86_pmu_disable_event(event);
- } else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
- intel_clear_masks(event, idx);
+ break;
+ case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_disable_fixed(event);
- } else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
+ break;
+ case INTEL_PMC_IDX_FIXED_BTS:
intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer();
- } else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
+ return;
+ case INTEL_PMC_IDX_FIXED_VLBR:
intel_clear_masks(event, idx);
+ break;
+ default:
+ intel_clear_masks(event, idx);
+ pr_warn("Failed to disable the event with invalid index %d\n",
+ idx);
+ return;
+ }
/*
* Needs to be called after x86_pmu_disable_event,
@@ -2208,10 +2242,189 @@ static void intel_pmu_del_event(struct perf_event *event)
intel_pmu_pebs_del(event);
}
+static int icl_set_topdown_event_period(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = local64_read(&hwc->period_left);
+
+ /*
+ * The values in PERF_METRICS MSR are derived from fixed counter 3.
+ * Software should start both registers, PERF_METRICS and fixed
+ * counter 3, from zero.
+ * Clear PERF_METRICS and Fixed counter 3 in initialization.
+ * After that, both MSRs will be cleared for each read.
+ * Don't need to clear them again.
+ */
+ if (left == x86_pmu.max_period) {
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrl(MSR_PERF_METRICS, 0);
+ hwc->saved_slots = 0;
+ hwc->saved_metric = 0;
+ }
+
+ if ((hwc->saved_slots) && is_slots_event(event)) {
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+ wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
+ }
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
+{
+ u32 val;
+
+ /*
+ * The metric is reported as an 8bit integer fraction
+ * suming up to 0xff.
+ * slots-in-metric = (Metric / 0xff) * slots
+ */
+ val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff;
+ return mul_u64_u32_div(slots, val, 0xff);
+}
+
+static u64 icl_get_topdown_value(struct perf_event *event,
+ u64 slots, u64 metrics)
+{
+ int idx = event->hw.idx;
+ u64 delta;
+
+ if (is_metric_idx(idx))
+ delta = icl_get_metrics_event_value(metrics, slots, idx);
+ else
+ delta = slots;
+
+ return delta;
+}
+
+static void __icl_update_topdown_event(struct perf_event *event,
+ u64 slots, u64 metrics,
+ u64 last_slots, u64 last_metrics)
+{
+ u64 delta, last = 0;
+
+ delta = icl_get_topdown_value(event, slots, metrics);
+ if (last_slots)
+ last = icl_get_topdown_value(event, last_slots, last_metrics);
+
+ /*
+ * The 8bit integer fraction of metric may be not accurate,
+ * especially when the changes is very small.
+ * For example, if only a few bad_spec happens, the fraction
+ * may be reduced from 1 to 0. If so, the bad_spec event value
+ * will be 0 which is definitely less than the last value.
+ * Avoid update event->count for this case.
+ */
+ if (delta > last) {
+ delta -= last;
+ local64_add(delta, &event->count);
+ }
+}
+
+static void update_saved_topdown_regs(struct perf_event *event,
+ u64 slots, u64 metrics)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ int idx;
+
+ event->hw.saved_slots = slots;
+ event->hw.saved_metric = metrics;
+
+ for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ other->hw.saved_slots = slots;
+ other->hw.saved_metric = metrics;
+ }
+}
+
+/*
+ * Update all active Topdown events.
+ *
+ * The PERF_METRICS and Fixed counter 3 are read separately. The values may be
+ * modify by a NMI. PMU has to be disabled before calling this function.
+ */
+static u64 icl_update_topdown_event(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ u64 slots, metrics;
+ bool reset = true;
+ int idx;
+
+ /* read Fixed counter 3 */
+ rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
+ if (!slots)
+ return 0;
+
+ /* read PERF_METRICS */
+ rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+
+ for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ __icl_update_topdown_event(other, slots, metrics,
+ event ? event->hw.saved_slots : 0,
+ event ? event->hw.saved_metric : 0);
+ }
+
+ /*
+ * Check and update this event, which may have been cleared
+ * in active_mask e.g. x86_pmu_stop()
+ */
+ if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
+ __icl_update_topdown_event(event, slots, metrics,
+ event->hw.saved_slots,
+ event->hw.saved_metric);
+
+ /*
+ * In x86_pmu_stop(), the event is cleared in active_mask first,
+ * then drain the delta, which indicates context switch for
+ * counting.
+ * Save metric and slots for context switch.
+ * Don't need to reset the PERF_METRICS and Fixed counter 3.
+ * Because the values will be restored in next schedule in.
+ */
+ update_saved_topdown_regs(event, slots, metrics);
+ reset = false;
+ }
+
+ if (reset) {
+ /* The fixed counter 3 has to be written before the PERF_METRICS. */
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrl(MSR_PERF_METRICS, 0);
+ if (event)
+ update_saved_topdown_regs(event, 0, 0);
+ }
+
+ return slots;
+}
+
+static void intel_pmu_read_topdown_event(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_even