summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 13:34:25 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 13:34:25 -0800
commit8a5be36b9303ae167468d4f5e1b3c090b9981396 (patch)
treeddf1721677782484bab6369a87f13611eafb879a /arch/powerpc/perf
parent09c0796adf0c793462fda1d7c8c43324551405c7 (diff)
parentc1bea0a840ac75dca19bc6aa05575a33eb9fd058 (diff)
Merge tag 'powerpc-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Switch to the generic C VDSO, as well as some cleanups of our VDSO setup/handling code. - Support for KUAP (Kernel User Access Prevention) on systems using the hashed page table MMU, using memory protection keys. - Better handling of PowerVM SMT8 systems where all threads of a core do not share an L2, allowing the scheduler to make better scheduling decisions. - Further improvements to our machine check handling. - Show registers when unwinding interrupt frames during stack traces. - Improvements to our pseries (PowerVM) partition migration code. - Several series from Christophe refactoring and cleaning up various parts of the 32-bit code. - Other smaller features, fixes & cleanups. Thanks to: Alan Modra, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Ard Biesheuvel, Athira Rajeev, Balamuruhan S, Bill Wendling, Cédric Le Goater, Christophe Leroy, Christophe Lombard, Colin Ian King, Daniel Axtens, David Hildenbrand, Frederic Barrat, Ganesh Goudar, Gautham R. Shenoy, Geert Uytterhoeven, Giuseppe Sacco, Greg Kurz, Harish, Jan Kratochvil, Jordan Niethe, Kaixu Xia, Laurent Dufour, Leonardo Bras, Madhavan Srinivasan, Mahesh Salgaonkar, Mathieu Desnoyers, Nathan Lynch, Nicholas Piggin, Oleg Nesterov, Oliver O'Halloran, Oscar Salvador, Po-Hsu Lin, Qian Cai, Qinglang Miao, Randy Dunlap, Ravi Bangoria, Sachin Sant, Sandipan Das, Sebastian Andrzej Siewior , Segher Boessenkool, Srikar Dronamraju, Tyrel Datwyler, Uwe Kleine-König, Vincent Stehlé, Youling Tang, and Zhang Xiaoxu. * tag 'powerpc-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (304 commits) powerpc/32s: Fix cleanup_cpu_mmu_context() compile bug powerpc: Add config fragment for disabling -Werror powerpc/configs: Add ppc64le_allnoconfig target powerpc/powernv: Rate limit opal-elog read failure message powerpc/pseries/memhotplug: Quieten some DLPAR operations powerpc/ps3: use dma_mapping_error() powerpc: force inlining of csum_partial() to avoid multiple csum_partial() with GCC10 powerpc/perf: Fix Threshold Event Counter Multiplier width for P10 powerpc/mm: Fix hugetlb_free_pmd_range() and hugetlb_free_pud_range() KVM: PPC: Book3S HV: Fix mask size for emulated msgsndp KVM: PPC: fix comparison to bool warning KVM: PPC: Book3S: Assign boolean values to a bool variable powerpc: Inline setup_kup() powerpc/64s: Mark the kuap/kuep functions non __init KVM: PPC: Book3S HV: XIVE: Add a comment regarding VP numbering powerpc/xive: Improve error reporting of OPAL calls powerpc/xive: Simplify xive_do_source_eoi() powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_EOI_FW powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_MASK_FW powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_SHIFT_BUG ...
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r--arch/powerpc/perf/8xx-pmu.c16
-rw-r--r--arch/powerpc/perf/callchain.h2
-rw-r--r--arch/powerpc/perf/callchain_32.c8
-rw-r--r--arch/powerpc/perf/callchain_64.c7
-rw-r--r--arch/powerpc/perf/core-book3s.c65
-rw-r--r--arch/powerpc/perf/isa207-common.c38
-rw-r--r--arch/powerpc/perf/isa207-common.h20
-rw-r--r--arch/powerpc/perf/power10-events-list.h9
-rw-r--r--arch/powerpc/perf/power10-pmu.c184
9 files changed, 309 insertions, 40 deletions
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index e53c3c161257..f970d1510d3d 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -153,6 +153,8 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
+ struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
+
mpc8xx_pmu_read(event);
/* If it was the last user, stop counting to avoid useles overhead */
@@ -164,22 +166,12 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
mtspr(SPRN_ICTRL, 7);
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
- if (atomic_dec_return(&itlb_miss_ref) == 0) {
- /* mfspr r10, SPRN_SPRG_SCRATCH0 */
- struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) |
- __PPC_SPR(SPRN_SPRG_SCRATCH0));
-
+ if (atomic_dec_return(&itlb_miss_ref) == 0)
patch_instruction_site(&patch__itlbmiss_exit_1, insn);
- }
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
- if (atomic_dec_return(&dtlb_miss_ref) == 0) {
- /* mfspr r10, SPRN_DAR */
- struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) |
- __PPC_SPR(SPRN_DAR));
-
+ if (atomic_dec_return(&dtlb_miss_ref) == 0)
patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
- }
break;
}
}
diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
index ae24d4a00da6..d6fa6e25234f 100644
--- a/arch/powerpc/perf/callchain.h
+++ b/arch/powerpc/perf/callchain.h
@@ -33,7 +33,7 @@ static inline int __read_user_stack(const void __user *ptr, void *ret,
rc = copy_from_user_nofault(ret, ptr, size);
- if (IS_ENABLED(CONFIG_PPC64) && rc)
+ if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc)
return read_user_stack_slow(ptr, ret, size);
return rc;
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
index 64e4013d8060..b83c47b7947f 100644
--- a/arch/powerpc/perf/callchain_32.c
+++ b/arch/powerpc/perf/callchain_32.c
@@ -59,8 +59,8 @@ static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
{
if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
return 1;
- if (vdso32_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_sigtramp)
+ if (current->mm->context.vdso &&
+ nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp32))
return 1;
return 0;
}
@@ -70,8 +70,8 @@ static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
if (nip == fp + offsetof(struct rt_signal_frame_32,
uc.uc_mcontext.mc_pad))
return 1;
- if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
+ if (current->mm->context.vdso &&
+ nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp_rt32))
return 1;
return 0;
}
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
index fed90e827f3a..8d0df4226328 100644
--- a/arch/powerpc/perf/callchain_64.c
+++ b/arch/powerpc/perf/callchain_64.c
@@ -21,7 +21,8 @@
/*
* On 64-bit we don't want to invoke hash_page on user addresses from
* interrupt context, so if the access faults, we read the page tables
- * to find which page (if any) is mapped and access it directly.
+ * to find which page (if any) is mapped and access it directly. Radix
+ * has no need for this so it doesn't use read_user_stack_slow.
*/
int read_user_stack_slow(const void __user *ptr, void *buf, int nb)
{
@@ -67,8 +68,8 @@ static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
{
if (nip == fp + offsetof(struct signal_frame_64, tramp))
return 1;
- if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
+ if (current->mm->context.vdso &&
+ nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64))
return 1;
return 0;
}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6586f7e71cfb..28206b1fe172 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -95,6 +95,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
#define SPRN_SIER3 0
#define MMCRA_SAMPLE_ENABLE 0
#define MMCRA_BHRB_DISABLE 0
+#define MMCR0_PMCCEXT 0
static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
@@ -137,6 +138,9 @@ static void pmao_restore_workaround(bool ebb) { }
bool is_sier_available(void)
{
+ if (!ppmu)
+ return false;
+
if (ppmu->flags & PPMU_HAS_SIER)
return true;
@@ -250,11 +254,32 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs)
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
bool use_siar = regs_use_siar(regs);
+ unsigned long mmcra = regs->dsisr;
+ int marked = mmcra & MMCRA_SAMPLE_ENABLE;
if (!use_siar)
return perf_flags_from_msr(regs);
/*
+ * Check the address in SIAR to identify the
+ * privilege levels since the SIER[MSR_HV, MSR_PR]
+ * bits are not set for marked events in power10
+ * DD1.
+ */
+ if (marked && (ppmu->flags & PPMU_P10_DD1)) {
+ unsigned long siar = mfspr(SPRN_SIAR);
+ if (siar) {
+ if (is_kernel_addr(siar))
+ return PERF_RECORD_MISC_KERNEL;
+ return PERF_RECORD_MISC_USER;
+ } else {
+ if (is_kernel_addr(regs->nip))
+ return PERF_RECORD_MISC_KERNEL;
+ return PERF_RECORD_MISC_USER;
+ }
+ }
+
+ /*
* If we don't have flags in MMCRA, rather than using
* the MSR, we intuit the flags from the address in
* SIAR which should give slightly more reliable
@@ -350,7 +375,14 @@ static inline int siar_valid(struct pt_regs *regs)
int marked = mmcra & MMCRA_SAMPLE_ENABLE;
if (marked) {
- if (ppmu->flags & PPMU_HAS_SIER)
+ /*
+ * SIER[SIAR_VALID] is not set for some
+ * marked events on power10 DD1, so drop
+ * the check for SIER[SIAR_VALID] and return true.
+ */
+ if (ppmu->flags & PPMU_P10_DD1)
+ return 0x1;
+ else if (ppmu->flags & PPMU_HAS_SIER)
return regs->dar & SIER_SIAR_VALID;
if (ppmu->flags & PPMU_SIAR_VALID)
@@ -1242,6 +1274,9 @@ static void power_pmu_disable(struct pmu *pmu)
val |= MMCR0_FC;
val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
MMCR0_FC56);
+ /* Set mmcr0 PMCCEXT for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCR0_PMCCEXT;
/*
* The barrier is to make sure the mtspr has been
@@ -1881,7 +1916,7 @@ static bool is_event_blacklisted(u64 ev)
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
- unsigned long flags;
+ unsigned long flags, irq_flags;
struct perf_event *ctrs[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int cflags[MAX_HWEVENTS];
@@ -1989,7 +2024,9 @@ static int power_pmu_event_init(struct perf_event *event)
if (check_excludes(ctrs, cflags, n, 1))
return -EINVAL;
- cpuhw = &get_cpu_var(cpu_hw_events);
+ local_irq_save(irq_flags);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+
err = power_check_constraints(cpuhw, events, cflags, n + 1);
if (has_branch_stack(event)) {
@@ -2000,13 +2037,13 @@ static int power_pmu_event_init(struct perf_event *event)
event->attr.branch_sample_type);
if (bhrb_filter == -1) {
- put_cpu_var(cpu_hw_events);
+ local_irq_restore(irq_flags);
return -EOPNOTSUPP;
}
cpuhw->bhrb_filter = bhrb_filter;
}
- put_cpu_var(cpu_hw_events);
+ local_irq_restore(irq_flags);
if (err)
return -EINVAL;
@@ -2125,6 +2162,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_event_update_userpage(event);
/*
+ * Due to hardware limitation, sometimes SIAR could sample a kernel
+ * address even when freeze on supervisor state (kernel) is set in
+ * MMCR2. Check attr.exclude_kernel and address to drop the sample in
+ * these cases.
+ */
+ if (event->attr.exclude_kernel && record)
+ if (is_kernel_addr(mfspr(SPRN_SIAR)))
+ record = 0;
+
+ /*
* Finally record data if requested.
*/
if (record) {
@@ -2180,8 +2227,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
bool use_siar = regs_use_siar(regs);
+ unsigned long siar = mfspr(SPRN_SIAR);
- if (use_siar && siar_valid(regs))
+ if (ppmu->flags & PPMU_P10_DD1) {
+ if (siar)
+ return siar;
+ else
+ return regs->nip;
+ } else if (use_siar && siar_valid(regs))
return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
else if (use_siar)
return 0; // no valid instruction pointer
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 2848904df638..6ab5b272090a 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -247,6 +247,9 @@ void isa207_get_mem_weight(u64 *weight)
u64 sier = mfspr(SPRN_SIER);
u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
+
if (val == 0 || val == 7)
*weight = 0;
else
@@ -311,9 +314,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
if (unit >= 6 && unit <= 9) {
- if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
- mask |= CNST_L2L3_GROUP_MASK;
- value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (unit == 6) {
+ mask |= CNST_L2L3_GROUP_MASK;
+ value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
+ }
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
mask |= CNST_CACHE_GROUP_MASK;
value |= CNST_CACHE_GROUP_VAL(event & 0xff);
@@ -339,12 +344,22 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
value |= CNST_L1_QUAL_VAL(cache);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mask |= CNST_RADIX_SCOPE_GROUP_MASK;
+ value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT);
+ }
+
if (is_event_marked(event)) {
mask |= CNST_SAMPLE_MASK;
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
}
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (event_is_threshold(event)) {
+ mask |= CNST_THRESH_CTL_SEL_MASK;
+ value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+ }
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
mask |= CNST_THRESH_MASK;
value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
@@ -456,6 +471,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
}
}
+ /* Set RADIX_SCOPE_QUAL bit */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) &
+ p10_EVENT_RADIX_SCOPE_QUAL_MASK;
+ mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT;
+ }
+
if (is_event_marked(event[i])) {
mmcra |= MMCRA_SAMPLE_ENABLE;
@@ -539,6 +561,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
if (!(pmc_inuse & 0x60))
mmcr->mmcr0 |= MMCR0_FC56;
+ /*
+ * Set mmcr0 (PMCCEXT) for p10 which
+ * will restrict access to group B registers
+ * when MMCR0 PMCC=0b00.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mmcr->mmcr0 |= MMCR0_PMCCEXT;
+
mmcr->mmcr1 = mmcr1;
mmcr->mmcra = mmcra;
mmcr->mmcr2 = mmcr2;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 7025de5e60e7..454b32c31440 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -101,6 +101,9 @@
#define p10_EVENT_CACHE_SEL_MASK 0x3ull
#define p10_EVENT_MMCR3_MASK 0x7fffull
#define p10_EVENT_MMCR3_SHIFT 45
+#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT 9
+#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1
+#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45
#define p10_EVENT_VALID_MASK \
((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \
@@ -112,6 +115,7 @@
(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
(p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
+ (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) | \
EVENT_LINUX_MASK | \
EVENT_PSEL_MASK))
/*
@@ -125,9 +129,9 @@
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
- * | | | |
- * BHRB IFM -* | | | Count of events for each PMC.
+ * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1]
+ * | | | | |
+ * BHRB IFM -* | | |*radix_scope | Count of events for each PMC.
* EBB -* | | p1, p2, p3, p4, p5, p6.
* L1 I/D qualifier -* |
* nc - number of counters -*
@@ -145,6 +149,9 @@
#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK)
+#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32)
+#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff)
+
#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
@@ -165,6 +172,9 @@
#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f)
+#define CNST_RADIX_SCOPE_GROUP_VAL(v) (((v) & 0x1ull) << 21)
+#define CNST_RADIX_SCOPE_GROUP_MASK CNST_RADIX_SCOPE_GROUP_VAL(1)
+
/*
* For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
@@ -221,6 +231,10 @@
#define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
MMCRA_THR_CTR_EXP_MASK)
+#define P10_MMCRA_THR_CTR_MANT_MASK 0xFFul
+#define P10_MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\
+ P10_MMCRA_THR_CTR_MANT_MASK)
+
/* MMCRA Threshold Compare bit constant for power9 */
#define p9_MMCRA_THR_CMP_SHIFT 45
diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
index 60c1b8111082..e45dafe818ed 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -15,6 +15,9 @@ EVENT(PM_EXEC_STALL, 0x30008);
EVENT(PM_RUN_INST_CMPL, 0x500fa);
EVENT(PM_BR_CMPL, 0x4d05e);
EVENT(PM_BR_MPRED_CMPL, 0x400f6);
+EVENT(PM_BR_FIN, 0x2f04a);
+EVENT(PM_MPRED_BR_FIN, 0x3e098);
+EVENT(PM_LD_DEMAND_MISS_L1_FIN, 0x400f0);
/* All L1 D cache load references counted at finish, gated by reject */
EVENT(PM_LD_REF_L1, 0x100fc);
@@ -36,6 +39,12 @@ EVENT(PM_IC_PREF_REQ, 0x040a0);
EVENT(PM_DATA_FROM_L3, 0x01340000001c040);
/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST, 0x010000046080);
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS, 0x26880);
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PF_MISS_L3, 0x100000016080);
/* Data PTEG reload */
EVENT(PM_DTLB_MISS, 0x300fc);
/* ITLB Reloaded */
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 9dbe8f9b89b4..79e0206ca454 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -23,10 +23,10 @@
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ]
- * | | | | | |
- * | | | | | *- mark
- * | | | *- L1/L2/L3 cache_sel |
+ * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ]
+ * | | | | | | |
+ * | | | | | | *- mark
+ * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual
* | | sdar_mode |
* | *- sampling mode for marked events *- combine
* |
@@ -59,6 +59,7 @@
*
* MMCR1[16] = cache_sel[0]
* MMCR1[17] = cache_sel[1]
+ * MMCR1[18] = radix_scope_qual
*
* if mark:
* MMCRA[63] = 1 (SAMPLE_ENABLE)
@@ -113,6 +114,9 @@ GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS);
GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN);
+GENERIC_EVENT_ATTR(branch-misses, PM_MPRED_BR_FIN);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_DEMAND_MISS_L1_FIN);
CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
@@ -123,12 +127,15 @@ CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ);
CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PF_MISS_L3);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
-static struct attribute *power10_events_attr[] = {
+static struct attribute *power10_events_attr_dd1[] = {
GENERIC_EVENT_PTR(PM_RUN_CYC),
GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
GENERIC_EVENT_PTR(PM_BR_CMPL),
@@ -153,6 +160,39 @@ static struct attribute *power10_events_attr[] = {
NULL
};
+static struct attribute *power10_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_RUN_CYC),
+ GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_FIN),
+ GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_L3_PF_MISS_L3),
+ CACHE_EVENT_PTR(PM_L2_ST_MISS),
+ CACHE_EVENT_PTR(PM_L2_ST),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BR_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static struct attribute_group power10_pmu_events_group_dd1 = {
+ .name = "events",
+ .attrs = power10_events_attr_dd1,
+};
+
static struct attribute_group power10_pmu_events_group = {
.name = "events",
.attrs = power10_events_attr,
@@ -175,6 +215,7 @@ PMU_FORMAT_ATTR(src_sel, "config:45-46");
PMU_FORMAT_ATTR(invert_bit, "config:47");
PMU_FORMAT_ATTR(src_mask, "config:48-53");
PMU_FORMAT_ATTR(src_match, "config:54-59");
+PMU_FORMAT_ATTR(radix_scope, "config:9");
static struct attribute *power10_pmu_format_attr[] = {
&format_attr_event.attr,
@@ -194,6 +235,7 @@ static struct attribute *power10_pmu_format_attr[] = {
&format_attr_invert_bit.attr,
&format_attr_src_mask.attr,
&format_attr_src_match.attr,
+ &format_attr_radix_scope.attr,
NULL,
};
@@ -202,13 +244,19 @@ static struct attribute_group power10_pmu_format_group = {
.attrs = power10_pmu_format_attr,
};
+static const struct attribute_group *power10_pmu_attr_groups_dd1[] = {
+ &power10_pmu_format_group,
+ &power10_pmu_events_group_dd1,
+ NULL,
+};
+
static const struct attribute_group *power10_pmu_attr_groups[] = {
&power10_pmu_format_group,
&power10_pmu_events_group,
NULL,
};
-static int power10_generic_events[] = {
+static int power10_generic_events_dd1[] = {
[PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC,
[PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
@@ -217,6 +265,15 @@ static int power10_generic_events[] = {
[PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
};
+static int power10_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_DEMAND_MISS_L1_FIN,
+};
+
static u64 power10_bhrb_filter_map(u64 branch_sample_type)
{
u64 pmu_bhrb_filter = 0;
@@ -273,7 +330,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = PM_LD_REF_L1,
@@ -374,6 +431,107 @@ static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
},
};
+static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_LD_REF_L1,
+ [C(RESULT_MISS)] = PM_LD_MISS_L1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ST_MISS_L1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+ [C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+ [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_L2_ST,
+ [C(RESULT_MISS)] = PM_L2_ST_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_DTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_BR_CMPL,
+ [C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+};
+
#undef C
static struct power_pmu power10_pmu = {
@@ -403,6 +561,7 @@ static struct power_pmu power10_pmu = {
int init_power10_pmu(void)
{
+ unsigned int pvr;
int rc;
/* Comes from cpu_specs[] */
@@ -410,9 +569,20 @@ int init_power10_pmu(void)
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10"))
return -ENODEV;
+ pvr = mfspr(SPRN_PVR);
+ /* Add the ppmu flag for power10 DD1 */
+ if ((PVR_CFG(pvr) == 1))
+ power10_pmu.flags |= PPMU_P10_DD1;
+
/* Set the PERF_REG_EXTENDED_MASK here */
PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+ if ((PVR_CFG(pvr) == 1)) {
+ power10_pmu.generic_events = power10_generic_events_dd1;
+ power10_pmu.attr_groups = power10_pmu_attr_groups_dd1;
+ power10_pmu.cache_events = &power10_cache_events_dd1;
+ }
+
rc = register_power_pmu(&power10_pmu);
if (rc)
return rc;