From a41f3c8cd4e28dcbebd8ec27a9602c86cfa5f009 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 23 Apr 2015 08:56:42 +0200 Subject: perf/x86/intel/uncore: Add Broadwell-U uncore IMC PMU support This patch enables the uncore Memory Controller (IMC) PMU support for Intel Broadwell-U (Model 61) mobile processors. The IMC PMU enables measuring memory bandwidth. To use with perf: $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/ sleep 10 Tested-by: Sonny Rao Signed-off-by: Stephane Eranian Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: kan.liang@intel.com Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/20150423065642.GA4890@thinkpad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 3 +++ arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1 + arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c635b8b49e93..a03f96402dbe 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -922,6 +922,9 @@ static int __init uncore_pci_init(void) case 69: /* Haswell Celeron */ ret = hsw_uncore_pci_init(); break; + case 61: /* Broadwell */ + ret = bdw_uncore_pci_init(); + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 6c8c1e7e69d8..06b07930e48b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -326,6 +326,7 @@ extern struct event_constraint uncore_constraint_empty; int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); +int bdw_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 4562e9e22c60..b005a78c7012 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -7,6 +7,7 @@ #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 +#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -486,6 +487,14 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { { /* end: all zeroes */ }, }; +static const struct pci_device_id bdw_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + static struct pci_driver snb_uncore_pci_driver = { .name = "snb_uncore", .id_table = snb_uncore_pci_ids, @@ -501,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = { .id_table = hsw_uncore_pci_ids, }; +static struct pci_driver bdw_uncore_pci_driver = { + .name = "bdw_uncore", + .id_table = bdw_uncore_pci_ids, +}; + struct imc_uncore_pci_dev { __u32 pci_id; struct pci_driver *driver; @@ -514,6 +528,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ + IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ { /* end marker */ } }; @@ -561,6 +576,11 @@ int hsw_uncore_pci_init(void) return imc_uncore_pci_init(); } +int bdw_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + /* end of Sandy Bridge uncore support */ /* Nehalem uncore support */ -- cgit v1.2.3 From f4d9757ca6f5a2db6919a5b1ab86b8afa16773d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 00:00:50 +0000 Subject: perf/x86/intel/cqm: Document PQR MSR abuse The CQM code acts like it owns the PQR MSR completely. That's not true because only the lower 10 bits are used for CQM. The upper 32 bits are used for the 'CLass Of Service ID' (CLOSID). Document the abuse. Will be fixed in a later patch. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/20150518235149.823214798@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index e4d1b8b738fa..572582e2143e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -978,7 +978,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) WARN_ON_ONCE(state->rmid); state->rmid = rmid; - wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid); + /* + * This is actually wrong, as the upper 32 bit MSR contain the + * closid which is used for configuring the Cache Allocation + * Technology component. + */ + wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0); raw_spin_unlock_irqrestore(&state->lock, flags); } @@ -998,7 +1003,13 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) if (!--state->cnt) { state->rmid = 0; - wrmsrl(MSR_IA32_PQR_ASSOC, 0); + /* + * This is actually wrong, as the upper 32 bit of the + * MSR contain the closid which is used for + * configuring the Cache Allocation Technology + * component. + */ + wrmsr(MSR_IA32_PQR_ASSOC, 0, 0); } else { WARN_ON_ONCE(!state->rmid); } -- cgit v1.2.3 From b3df4ec4424f27e55d754cfe586195fecca1c4e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 00:00:51 +0000 Subject: perf/x86/intel/cqm: Use proper data types 'int' is really not a proper data type for an MSR. Use u32 to make it clear that we are dealing with a 32-bit unsigned hardware value. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/20150518235149.919350144@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 572582e2143e..3e9a7fbfce58 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -18,7 +18,7 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */ struct intel_cqm_state { raw_spinlock_t lock; - int rmid; + u32 rmid; int cnt; }; @@ -962,7 +962,7 @@ out: static void intel_cqm_event_start(struct perf_event *event, int mode) { struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); - unsigned int rmid = event->hw.cqm_rmid; + u32 rmid = event->hw.cqm_rmid; unsigned long flags; if (!(event->hw.cqm_state & PERF_HES_STOPPED)) -- cgit v1.2.3 From 9e7eaac95af6c1aecaf558b8c7a1757d5f2d2ad7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 00:00:53 +0000 Subject: perf/x86/intel/cqm: Remove pointless spinlock from state cache 'struct intel_cqm_state' is a strict per CPU cache of the rmid and the usage counter. It can never be modified from a remote CPU. The three functions which modify the content: intel_cqm_event[start|stop|del] (del maps to stop) are called from the perf core with interrupts disabled which is enough protection for the per CPU state values. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/20150518235150.001006529@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 3e9a7fbfce58..63391f860175 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -17,11 +17,16 @@ static unsigned int cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ struct intel_cqm_state { - raw_spinlock_t lock; u32 rmid; int cnt; }; +/* + * The cached intel_cqm_state is strictly per CPU and can never be + * updated from a remote CPU. Both functions which modify the state + * (intel_cqm_event_start and intel_cqm_event_stop) are called with + * interrupts disabled, which is sufficient for the protection. + */ static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); /* @@ -963,15 +968,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) { struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); u32 rmid = event->hw.cqm_rmid; - unsigned long flags; if (!(event->hw.cqm_state & PERF_HES_STOPPED)) return; event->hw.cqm_state &= ~PERF_HES_STOPPED; - raw_spin_lock_irqsave(&state->lock, flags); - if (state->cnt++) WARN_ON_ONCE(state->rmid != rmid); else @@ -984,21 +986,17 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) * Technology component. */ wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0); - - raw_spin_unlock_irqrestore(&state->lock, flags); } static void intel_cqm_event_stop(struct perf_event *event, int mode) { struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); - unsigned long flags; if (event->hw.cqm_state & PERF_HES_STOPPED) return; event->hw.cqm_state |= PERF_HES_STOPPED; - raw_spin_lock_irqsave(&state->lock, flags); intel_cqm_event_read(event); if (!--state->cnt) { @@ -1013,8 +1011,6 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) } else { WARN_ON_ONCE(!state->rmid); } - - raw_spin_unlock_irqrestore(&state->lock, flags); } static int intel_cqm_event_add(struct perf_event *event, int mode) @@ -1257,7 +1253,6 @@ static void intel_cqm_cpu_prepare(unsigned int cpu) struct intel_cqm_state *state = &per_cpu(cqm_state, cpu); struct cpuinfo_x86 *c = &cpu_data(cpu); - raw_spin_lock_init(&state->lock); state->rmid = 0; state->cnt = 0; -- cgit v1.2.3 From 0bac237845e203dd1439cfc571b1baf1b2274b3b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 00:00:55 +0000 Subject: perf/x86/intel/cqm: Avoid pointless MSR write If the usage counter is non-zero there is no point to update the rmid in the PQR MSR. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/20150518235150.080844281@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 63391f860175..2ce69c0953ab 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -974,10 +974,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) event->hw.cqm_state &= ~PERF_HES_STOPPED; - if (state->cnt++) - WARN_ON_ONCE(state->rmid != rmid); - else + if (state->cnt++) { + if (!WARN_ON_ONCE(state->rmid != rmid)) + return; + } else { WARN_ON_ONCE(state->rmid); + } state->rmid = rmid; /* -- cgit v1.2.3 From 43d0c2f6dcd07ffc0de658a7fbeeb63c806e9caa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 00:00:56 +0000 Subject: perf/x86/intel/cqm: Remove useless wrapper function intel_cqm_event_del() is a 1:1 wrapper for intel_cqm_event_stop(). Remove the useless indirection. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/20150518235150.159779847@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 2ce69c0953ab..8241b64d34c4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -1033,11 +1033,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode) return 0; } -static void intel_cqm_event_del(struct perf_event *event, int mode) -{ - intel_cqm_event_stop(event, mode); -} - static void intel_cqm_event_destroy(struct perf_event *event) { struct perf_event *group_other = NULL; @@ -1230,7 +1225,7 @@ static struct pmu intel_cqm_pmu = { .task_ctx_nr = perf_sw_context, .event_init = intel_cqm_event_init, .add = intel_cqm_event_add, - .del = intel_cqm_event_del, + .del = intel_cqm_event_stop, .start = intel_cqm_event_start, .stop = intel_cqm_event_stop, .read = intel_cqm_event_read, -- cgit v1.2.3 From bf926731e1585ccad029ca2fad1444fee082b78d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 00:00:58 +0000 Subject: perf/x86/intel/cqm: Add storage for 'closid' and clean up 'struct intel_pqr_state' 'closid' (CLass Of Service ID) is used for the Class based Cache Allocation Technology (CAT). Add explicit storage to the per cpu cache for it, so it can be used later with the CAT support (requires to move the per cpu data). While at it: - Rename the structure to intel_pqr_state which reflects the actual purpose of the struct: cache values which go into the PQR MSR - Rename 'cnt' to rmid_usecnt which reflects the actual purpose of the counter. - Document the structure and the struct members. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/20150518235150.240899319@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 50 ++++++++++++++++-------------- 1 file changed, 27 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 8241b64d34c4..8233b29bdd35 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -16,18 +16,32 @@ static unsigned int cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ -struct intel_cqm_state { +/** + * struct intel_pqr_state - State cache for the PQR MSR + * @rmid: The cached Resource Monitoring ID + * @closid: The cached Class Of Service ID + * @rmid_usecnt: The usage counter for rmid + * + * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the + * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always + * contains both parts, so we need to cache them. + * + * The cache also helps to avoid pointless updates if the value does + * not change. + */ +struct intel_pqr_state { u32 rmid; - int cnt; + u32 closid; + int rmid_usecnt; }; /* - * The cached intel_cqm_state is strictly per CPU and can never be + * The cached intel_pqr_state is strictly per CPU and can never be * updated from a remote CPU. Both functions which modify the state * (intel_cqm_event_start and intel_cqm_event_stop) are called with * interrupts disabled, which is sufficient for the protection. */ -static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); +static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); /* * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. @@ -966,7 +980,7 @@ out: static void intel_cqm_event_start(struct perf_event *event, int mode) { - struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); u32 rmid = event->hw.cqm_rmid; if (!(event->hw.cqm_state & PERF_HES_STOPPED)) @@ -974,7 +988,7 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) event->hw.cqm_state &= ~PERF_HES_STOPPED; - if (state->cnt++) { + if (state->rmid_usecnt++) { if (!WARN_ON_ONCE(state->rmid != rmid)) return; } else { @@ -982,17 +996,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) } state->rmid = rmid; - /* - * This is actually wrong, as the upper 32 bit MSR contain the - * closid which is used for configuring the Cache Allocation - * Technology component. - */ - wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0); + wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid); } static void intel_cqm_event_stop(struct perf_event *event, int mode) { - struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); + struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); if (event->hw.cqm_state & PERF_HES_STOPPED) return; @@ -1001,15 +1010,9 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) intel_cqm_event_read(event); - if (!--state->cnt) { + if (!--state->rmid_usecnt) { state->rmid = 0; - /* - * This is actually wrong, as the upper 32 bit of the - * MSR contain the closid which is used for - * configuring the Cache Allocation Technology - * component. - */ - wrmsr(MSR_IA32_PQR_ASSOC, 0, 0); + wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid); } else { WARN_ON_ONCE(!state->rmid); } @@ -1247,11 +1250,12 @@ static inline void cqm_pick_event_reader(int cpu) static void intel_cqm_cpu_prepare(unsigned int cpu) { - struct intel_cqm_state *state = &per_cpu(cqm_state, cpu); + struct intel_pqr_state *state = &per_cpu(pqr_state, cpu); struct cpuinfo_x86 *c = &cpu_data(cpu); state->rmid = 0; - state->cnt = 0; + state->closid = 0; + state->rmid_usecnt = 0; WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); -- cgit v1.2.3 From adafa99960ef18b019f001ddee4d9d81c4e25944 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 22 May 2015 09:59:42 +0100 Subject: perf/x86/intel/cqm: Use 'u32' data type for RMIDs Since we write RMID values to MSRs the correct type to use is 'u32' because that clearly articulates we're writing a hardware register value. Fix up all uses of RMID in this code to consistently use the correct data type. Reported-by: Thomas Gleixner Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/1432285182-17180-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 37 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 8233b29bdd35..188076161c1b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -13,7 +13,7 @@ #define MSR_IA32_QM_CTR 0x0c8e #define MSR_IA32_QM_EVTSEL 0x0c8d -static unsigned int cqm_max_rmid = -1; +static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ /** @@ -76,7 +76,7 @@ static cpumask_t cqm_cpumask; * near-zero occupancy value, i.e. no cachelines are tagged with this * RMID, once __intel_cqm_rmid_rotate() returns. */ -static unsigned int intel_cqm_rotation_rmid; +static u32 intel_cqm_rotation_rmid; #define INVALID_RMID (-1) @@ -88,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid; * Likewise, an rmid value of -1 is used to indicate "no rmid currently * assigned" and is used as part of the rotation code. */ -static inline bool __rmid_valid(unsigned int rmid) +static inline bool __rmid_valid(u32 rmid) { if (!rmid || rmid == INVALID_RMID) return false; @@ -96,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid) return true; } -static u64 __rmid_read(unsigned int rmid) +static u64 __rmid_read(u32 rmid) { u64 val; @@ -121,7 +121,7 @@ enum rmid_recycle_state { }; struct cqm_rmid_entry { - unsigned int rmid; + u32 rmid; enum rmid_recycle_state state; struct list_head list; unsigned long queue_time; @@ -166,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru); */ static struct cqm_rmid_entry **cqm_rmid_ptrs; -static inline struct cqm_rmid_entry *__rmid_entry(int rmid) +static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid) { struct cqm_rmid_entry *entry; @@ -181,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid) * * We expect to be called with cache_mutex held. */ -static int __get_rmid(void) +static u32 __get_rmid(void) { struct cqm_rmid_entry *entry; @@ -196,7 +196,7 @@ static int __get_rmid(void) return entry->rmid; } -static void __put_rmid(unsigned int rmid) +static void __put_rmid(u32 rmid) { struct cqm_rmid_entry *entry; @@ -391,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b) } struct rmid_read { - unsigned int rmid; + u32 rmid; atomic64_t value; }; @@ -400,12 +400,11 @@ static void __intel_cqm_event_count(void *info); /* * Exchange the RMID of a group of events. */ -static unsigned int -intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid) +static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) { struct perf_event *event; - unsigned int old_rmid = group->hw.cqm_rmid; struct list_head *head = &group->hw.cqm_group_entry; + u32 old_rmid = group->hw.cqm_rmid; lockdep_assert_held(&cache_mutex); @@ -470,7 +469,7 @@ static void intel_cqm_stable(void *arg) * If we have group events waiting for an RMID that don't conflict with * events already running, assign @rmid. */ -static bool intel_cqm_sched_in_event(unsigned int rmid) +static bool intel_cqm_sched_in_event(u32 rmid) { struct perf_event *leader, *event; @@ -617,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available) static void __intel_cqm_pick_and_rotate(struct perf_event *next) { struct perf_event *rotor; - unsigned int rmid; + u32 rmid; lockdep_assert_held(&cache_mutex); @@ -645,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next) static void intel_cqm_sched_out_conflicting_events(struct perf_event *event) { struct perf_event *group, *g; - unsigned int rmid; + u32 rmid; lockdep_assert_held(&cache_mutex); @@ -847,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event, struct perf_event **group) { struct perf_event *iter; - unsigned int rmid; bool conflict = false; + u32 rmid; list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { rmid = iter->hw.cqm_rmid; @@ -879,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event, static void intel_cqm_event_read(struct perf_event *event) { unsigned long flags; - unsigned int rmid; + u32 rmid; u64 val; /* @@ -1021,7 +1020,7 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) static int intel_cqm_event_add(struct perf_event *event, int mode) { unsigned long flags; - unsigned int rmid; + u32 rmid; raw_spin_lock_irqsave(&cache_lock, flags); @@ -1064,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event) list_replace(&event->hw.cqm_groups_entry, &group_other->hw.cqm_groups_entry); } else { - unsigned int rmid = event->hw.cqm_rmid; + u32 rmid = event->hw.cqm_rmid; if (__rmid_valid(rmid)) __put_rmid(rmid); -- cgit v1.2.3 From 1c565833ac7950a5ddb3322e9848e0628ceba3b5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:21 +0200 Subject: perf/x86/intel: Correct local vs remote sibling state For some obscure reason the current code accounts the current SMT thread's state on the remote thread and reads the remote's state on the local SMT thread. While internally consistent, and 'correct' its pointless confusion we can do without. Flip them the right way around. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 79 ++++++++++++++-------------------- 1 file changed, 33 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a1e35c9f06b9..0ea040562bb8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1903,9 +1903,8 @@ static void intel_start_scheduling(struct cpu_hw_events *cpuc) { struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; - struct intel_excl_states *xl, *xlo; + struct intel_excl_states *xl; int tid = cpuc->excl_thread_id; - int o_tid = 1 - tid; /* sibling thread */ /* * nothing needed if in group validation mode @@ -1919,7 +1918,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) if (!excl_cntrs) return; - xlo = &excl_cntrs->states[o_tid]; xl = &excl_cntrs->states[tid]; xl->sched_started = true; @@ -1932,18 +1930,17 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) raw_spin_lock(&excl_cntrs->lock); /* - * save initial state of sibling thread + * Save a copy of our state to work on. */ - memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); + memcpy(xl->init_state, xl->state, sizeof(xl->init_state)); } static void intel_stop_scheduling(struct cpu_hw_events *cpuc) { struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; - struct intel_excl_states *xl, *xlo; + struct intel_excl_states *xl; int tid = cpuc->excl_thread_id; - int o_tid = 1 - tid; /* sibling thread */ /* * nothing needed if in group validation mode @@ -1956,13 +1953,12 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc) if (!excl_cntrs) return; - xlo = &excl_cntrs->states[o_tid]; xl = &excl_cntrs->states[tid]; /* - * make new sibling thread state visible + * Commit the working state. */ - memcpy(xlo->state, xlo->init_state, sizeof(xlo->state)); + memcpy(xl->state, xl->init_state, sizeof(xl->state)); xl->sched_started = false; /* @@ -1977,10 +1973,9 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, { struct event_constraint *cx; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; - struct intel_excl_states *xl, *xlo; - int is_excl, i; + struct intel_excl_states *xlo; int tid = cpuc->excl_thread_id; - int o_tid = 1 - tid; /* alternate */ + int is_excl, i; /* * validating a group does not require @@ -1994,23 +1989,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, */ if (!excl_cntrs) return c; - /* - * event requires exclusive counter access - * across HT threads - */ - is_excl = c->flags & PERF_X86_EVENT_EXCL; - if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { - event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; - if (!cpuc->n_excl++) - WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); - } - - /* - * xl = state of current HT - * xlo = state of sibling HT - */ - xl = &excl_cntrs->states[tid]; - xlo = &excl_cntrs->states[o_tid]; cx = c; @@ -2053,6 +2031,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * of this function */ + /* + * state of sibling HT + */ + xlo = &excl_cntrs->states[tid ^ 1]; + + /* + * event requires exclusive counter access + * across HT threads + */ + is_excl = c->flags & PERF_X86_EVENT_EXCL; + if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { + event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; + if (!cpuc->n_excl++) + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); + } + /* * Modify static constraint with current dynamic * state of thread @@ -2067,14 +2061,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * our corresponding counter cannot be used * regardless of our event */ - if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) + if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) __clear_bit(i, cx->idxmsk); /* * if measuring an exclusive event, sibling * measuring non-exclusive, then counter cannot * be used */ - if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) + if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) __clear_bit(i, cx->idxmsk); } @@ -2124,10 +2118,9 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, { struct hw_perf_event *hwc = &event->hw; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; - struct intel_excl_states *xlo, *xl; - unsigned long flags = 0; /* keep compiler happy */ int tid = cpuc->excl_thread_id; - int o_tid = 1 - tid; + struct intel_excl_states *xl; + unsigned long flags = 0; /* keep compiler happy */ /* * nothing needed if in group validation mode @@ -2141,7 +2134,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, return; xl = &excl_cntrs->states[tid]; - xlo = &excl_cntrs->states[o_tid]; if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; if (!--cpuc->n_excl) @@ -2161,7 +2153,7 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, * counter state as unused now */ if (hwc->idx >= 0) - xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; + xl->state[hwc->idx] = INTEL_EXCL_UNUSED; if (!xl->sched_started) raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); @@ -2200,16 +2192,12 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt { struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct event_constraint *c = cpuc->event_constraint[idx]; - struct intel_excl_states *xlo, *xl; + struct intel_excl_states *xl; int tid = cpuc->excl_thread_id; - int o_tid = 1 - tid; - int is_excl; if (cpuc->is_fake || !c) return; - is_excl = c->flags & PERF_X86_EVENT_EXCL; - if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) return; @@ -2219,15 +2207,14 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt return; xl = &excl_cntrs->states[tid]; - xlo = &excl_cntrs->states[o_tid]; WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); if (cntr >= 0) { - if (is_excl) - xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; + if (c->flags & PERF_X86_EVENT_EXCL) + xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; else - xlo->init_state[cntr] = INTEL_EXCL_SHARED; + xl->init_state[cntr] = INTEL_EXCL_SHARED; } } -- cgit v1.2.3 From b32ed7f5de262b10633bb6c6dbb7f8ba46598cf4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 12:38:21 +0200 Subject: perf/x86/intel: Add lockdep assert Lockdep is very good at finding incorrect IRQ state while locking and is far better at telling us if we hold a lock than the _is_locked() API. It also generates less code for !DEBUG kernels. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 0ea040562bb8..5182cee16367 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1926,7 +1926,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) * in stop_event_scheduling() * makes scheduling appear as a transaction */ - WARN_ON_ONCE(!irqs_disabled()); raw_spin_lock(&excl_cntrs->lock); /* @@ -2208,7 +2207,7 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt xl = &excl_cntrs->states[tid]; - WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); + lockdep_assert_held(&excl_cntrs->lock); if (cntr >= 0) { if (c->flags & PERF_X86_EVENT_EXCL) -- cgit v1.2.3 From aaf932e8161e45291cc85085b6d850f1bbdf53c8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:24 +0200 Subject: perf/x86/intel: Simplify the dynamic constraint code somewhat We have two 'struct event_constraint' local variables in intel_get_excl_constraints(): 'cx' and 'c'. Instead of using 'cx' after the dynamic allocation, put all 'cx' inside the dynamic allocation block and use 'c' outside of it. Also use direct assignment to copy the structure; let the compiler figure it out. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 5182cee16367..9588b8d1264d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1970,7 +1970,6 @@ static struct event_constraint * intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, int idx, struct event_constraint *c) { - struct event_constraint *cx; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_states *xlo; int tid = cpuc->excl_thread_id; @@ -1989,8 +1988,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, if (!excl_cntrs) return c; - cx = c; - /* * because we modify the constraint, we need * to make a copy. Static constraints come @@ -2000,6 +1997,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * been cloned (marked dynamic) */ if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { + struct event_constraint *cx; /* sanity check */ if (idx < 0) @@ -2014,13 +2012,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * initialize dynamic constraint * with static constraint */ - memcpy(cx, c, sizeof(*cx)); + *cx = *c; /* * mark constraint as dynamic, so we * can free it later on */ cx->flags |= PERF_X86_EVENT_DYNAMIC; + c = cx; } /* @@ -2054,37 +2053,37 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * SHARED : sibling counter measuring non-exclusive event * UNUSED : sibling counter unused */ - for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) { + for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { /* * exclusive event in sibling counter * our corresponding counter cannot be used * regardless of our event */ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) - __clear_bit(i, cx->idxmsk); + __clear_bit(i, c->idxmsk); /* * if measuring an exclusive event, sibling * measuring non-exclusive, then counter cannot * be used */ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) - __clear_bit(i, cx->idxmsk); + __clear_bit(i, c->idxmsk); } /* * recompute actual bit weight for scheduling algorithm */ - cx->weight = hweight64(cx->idxmsk64); + c->weight = hweight64(c->idxmsk64); /* * if we return an empty mask, then switch * back to static empty constraint to avoid * the cost of freeing later on */ - if (cx->weight == 0) - cx = &emptyconstraint; + if (c->weight == 0) + c = &emptyconstraint; - return cx; + return c; } static struct event_constraint * -- cgit v1.2.3 From 17186ccda374ae02ef231cbbc8f1825e7c19ddbd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:28 +0200 Subject: perf/x86/intel: Make WARN()ings consistent The intel_commit_scheduling() callback is pointlessly different from the start and stop scheduling callback. Furthermore, the constraint should never be NULL, so remove that test. Even though we'll never get called (because we NULL the callbacks) when !is_ht_workaround_enabled() put that test in. Collapse the (pointless) WARN_ON_ONCE() and bail on !cpuc->excl_cntrs -- this is doubly pointless, because its the same condition as is_ht_workaround_enabled() which was already pointless because the whole method won't ever be called. Furthremore, make all the !excl_cntrs test WARN_ON_ONCE(); they're all pointless, because the above, either the function ({get,put}_excl_constraint) are already predicated on it existing or the is_ht_workaround_enabled() thing is the same test. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9588b8d1264d..d7d30b41f6a3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1915,7 +1915,7 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) /* * no exclusion needed */ - if (!excl_cntrs) + if (WARN_ON_ONCE(!excl_cntrs)) return; xl = &excl_cntrs->states[tid]; @@ -1949,7 +1949,7 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc) /* * no exclusion needed */ - if (!excl_cntrs) + if (WARN_ON_ONCE(!excl_cntrs)) return; xl = &excl_cntrs->states[tid]; @@ -1985,7 +1985,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, /* * no exclusion needed */ - if (!excl_cntrs) + if (WARN_ON_ONCE(!excl_cntrs)) return c; /* @@ -2126,9 +2126,7 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, if (cpuc->is_fake) return; - WARN_ON_ONCE(!excl_cntrs); - - if (!excl_cntrs) + if (WARN_ON_ONCE(!excl_cntrs)) return; xl = &excl_cntrs->states[tid]; @@ -2193,15 +2191,13 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt struct intel_excl_states *xl; int tid = cpuc->excl_thread_id; - if (cpuc->is_fake || !c) + if (cpuc->is_fake || !is_ht_workaround_enabled()) return; - if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) + if (WARN_ON_ONCE(!excl_cntrs)) return; - WARN_ON_ONCE(!excl_cntrs); - - if (!excl_cntrs) + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) return; xl = &excl_cntrs->states[tid]; -- cgit v1.2.3 From 0c41e756b9c5a9899b5cd238226600f8f34c9b82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:32 +0200 Subject: perf/x86/intel: Clean up intel_commit_scheduling() placement Move the code of intel_commit_scheduling() to the right place, which is in between start() and stop(). No change in functionality. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 4 +-- arch/x86/kernel/cpu/perf_event_intel.c | 60 +++++++++++++++++----------------- 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ef78516850fb..e5609522255c 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -527,10 +527,10 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); - void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); - void (*start_scheduling)(struct cpu_hw_events *cpuc); + void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); + void (*stop_scheduling)(struct cpu_hw_events *cpuc); struct event_constraint *event_constraints; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d7d30b41f6a3..ff56fc3f016e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1934,6 +1934,34 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) memcpy(xl->init_state, xl->state, sizeof(xl->init_state)); } +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) +{ + struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; + struct event_constraint *c = cpuc->event_constraint[idx]; + struct intel_excl_states *xl; + int tid = cpuc->excl_thread_id; + + if (cpuc->is_fake || !is_ht_workaround_enabled()) + return; + + if (WARN_ON_ONCE(!excl_cntrs)) + return; + + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) + return; + + xl = &excl_cntrs->states[tid]; + + lockdep_assert_held(&excl_cntrs->lock); + + if (cntr >= 0) { + if (c->flags & PERF_X86_EVENT_EXCL) + xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; + else + xl->init_state[cntr] = INTEL_EXCL_SHARED; + } +} + static void intel_stop_scheduling(struct cpu_hw_events *cpuc) { @@ -2184,34 +2212,6 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, intel_put_excl_constraints(cpuc, event); } -static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) -{ - struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; - struct event_constraint *c = cpuc->event_constraint[idx]; - struct intel_excl_states *xl; - int tid = cpuc->excl_thread_id; - - if (cpuc->is_fake || !is_ht_workaround_enabled()) - return; - - if (WARN_ON_ONCE(!excl_cntrs)) - return; - - if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) - return; - - xl = &excl_cntrs->states[tid]; - - lockdep_assert_held(&excl_cntrs->lock); - - if (cntr >= 0) { - if (c->flags & PERF_X86_EVENT_EXCL) - xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; - else - xl->init_state[cntr] = INTEL_EXCL_SHARED; - } -} - static void intel_pebs_aliases_core2(struct perf_event *event) { if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { @@ -2920,8 +2920,8 @@ static __init void intel_ht_bug(void) { x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; - x86_pmu.commit_scheduling = intel_commit_scheduling; x86_pmu.start_scheduling = intel_start_scheduling; + x86_pmu.commit_scheduling = intel_commit_scheduling; x86_pmu.stop_scheduling = intel_stop_scheduling; } @@ -3377,8 +3377,8 @@ static __init int fixup_ht_bug(void) x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); - x86_pmu.commit_scheduling = NULL; x86_pmu.start_scheduling = NULL; + x86_pmu.commit_scheduling = NULL; x86_pmu.stop_scheduling = NULL; watchdog_nmi_enable_all(); -- cgit v1.2.3 From 1fe684e349e904adeed2883cfdeef259a21c94f4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:36 +0200 Subject: perf/x86/intel: Remove pointless tests Both intel_commit_scheduling() and intel_get_excl_contraints() test for cntr < 0. The only way that can happen (aside from a bug) is through validate_event(), however that is already captured by the cpuc->is_fake test. So remove these test and simplify the code. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ff56fc3f016e..6a3e794cdc06 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1954,12 +1954,10 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt lockdep_assert_held(&excl_cntrs->lock); - if (cntr >= 0) { - if (c->flags & PERF_X86_EVENT_EXCL) - xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; - else - xl->init_state[cntr] = INTEL_EXCL_SHARED; - } + if (c->flags & PERF_X86_EVENT_EXCL) + xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; + else + xl->init_state[cntr] = INTEL_EXCL_SHARED; } static void @@ -2027,10 +2025,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { struct event_constraint *cx; - /* sanity check */ - if (idx < 0) - return &emptyconstraint; - /* * grab pre-allocated constraint entry */ -- cgit v1.2.3 From 43ef205bded025432f5eeeb3503c11fe5cd1913e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:39 +0200 Subject: perf/x86/intel: Remove intel_excl_states::init_state For some obscure reason intel_{start,stop}_scheduling() copy the HT state to an intermediate array. This would make sense if we ever were to make changes to it which we'd have to discard. Except we don't. By the time we call intel_commit_scheduling() we're; as the name implies; committed to them. We'll never back out. A further hint its pointless is that stop_scheduling() unconditionally publishes the state. So the intermediate array is pointless, modify the state in place and kill the extra array. And remove the pointless array initialization: INTEL_EXCL_UNUSED == 0. Note; all is serialized by intel_excl_cntr::lock. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 - arch/x86/kernel/cpu/perf_event.h | 1 - arch/x86/kernel/cpu/perf_event_intel.c | 22 ++-------------------- 3 files changed, 2 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4f7001f28936..d275da3d81dd 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -884,7 +884,6 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) } if (!assign || unsched) { - for (i = 0; i < n; i++) { e = cpuc->event_list[i]; /* diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index e5609522255c..89e6cd61e6ae 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -133,7 +133,6 @@ enum intel_excl_state_type { }; struct intel_excl_states { - enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; enum intel_excl_state_type state[X86_PMC_IDX_MAX]; bool sched_started; /* true if scheduling has started */ }; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6a3e794cdc06..f3201439031d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1927,11 +1927,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) * makes scheduling appear as a transaction */ raw_spin_lock(&excl_cntrs->lock); - - /* - * Save a copy of our state to work on. - */ - memcpy(xl->init_state, xl->state, sizeof(xl->init_state)); } static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) @@ -1955,9 +1950,9 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt lockdep_assert_held(&excl_cntrs->lock); if (c->flags & PERF_X86_EVENT_EXCL) - xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; + xl->state[cntr] = INTEL_EXCL_EXCLUSIVE; else - xl->init_state[cntr] = INTEL_EXCL_SHARED; + xl->state[cntr] = INTEL_EXCL_SHARED; } static void @@ -1980,11 +1975,6 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc) xl = &excl_cntrs->states[tid]; - /* - * Commit the working state. - */ - memcpy(xl->state, xl->init_state, sizeof(xl->state)); - xl->sched_started = false; /* * release shared state lock (acquired in intel_start_scheduling()) @@ -2519,19 +2509,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu) static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) { struct intel_excl_cntrs *c; - int i; c = kzalloc_node(sizeof(struct intel_excl_cntrs), GFP_KERNEL, cpu_to_node(cpu)); if (c) { raw_spin_lock_init(&c->lock); - for (i = 0; i < X86_PMC_IDX_MAX; i++) { - c->states[0].state[i] = INTEL_EXCL_UNUSED; - c->states[0].init_state[i] = INTEL_EXCL_UNUSED; - - c->states[1].state[i] = INTEL_EXCL_UNUSED; - c->states[1].init_state[i] = INTEL_EXCL_UNUSED; - } c->core_id = -1; } return c; -- cgit v1.2.3 From 8736e548db0f48244132bc36331496494625bbaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:43 +0200 Subject: perf/x86: Simplify the x86_schedule_events() logic !x && y == ! (x || !y) Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d275da3d81dd..dbe3328f8ad7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -881,9 +881,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (x86_pmu.commit_scheduling) x86_pmu.commit_scheduling(cpuc, i, assign[i]); } - } - - if (!assign || unsched) { + } else { for (i = 0; i < n; i++) { e = cpuc->event_list[i]; /* -- cgit v1.2.3 From ba040653b48d32afa8b6c3331eae97c6bbb66f03 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 May 2015 11:36:13 +0200 Subject: perf/x86/intel: Simplify put_exclusive_constraints() Don't bother with taking locks if we're not actually going to do anything. Also, drop the _irqsave(), this is very much only called from IRQ-disabled context. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f3201439031d..74f19d9268bb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2130,7 +2130,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; int tid = cpuc->excl_thread_id; struct intel_excl_states *xl; - unsigned long flags = 0; /* keep compiler happy */ /* * nothing needed if in group validation mode @@ -2141,7 +2140,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, if (WARN_ON_ONCE(!excl_cntrs)) return; - xl = &excl_cntrs->states[tid]; if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; if (!--cpuc->n_excl) @@ -2149,22 +2147,25 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, } /* - * put_constraint may be called from x86_schedule_events() - * which already has the lock held so here make locking - * conditional + * If event was actually assigned, then mark the counter state as + * unused now. */ - if (!xl->sched_started) - raw_spin_lock_irqsave(&excl_cntrs->lock, flags); + if (hwc->idx >= 0) { + xl = &excl_cntrs->states[tid]; + + /* + * put_constraint may be called from x86_schedule_events() + * which already has the lock held so here make locking + * conditional. + */ + if (!xl->sched_started) + raw_spin_lock(&excl_cntrs->lock); - /* - * if event was actually assigned, then mark the - * counter state as unused now - */ - if (hwc->idx >= 0) xl->state[hwc->idx] = INTEL_EXCL_UNUSED; - if (!xl->sched_started) - raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); + if (!xl->sched_started) + raw_spin_unlock(&excl_cntrs->lock); + } } static void -- cgit v1.2.3 From 74387bcb711b7b2ed65c0ed08953e13d4e31969e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 21 Apr 2015 16:16:13 +0300 Subject: perf/x86/intel/pt: Kill an unused variable Currently, there's a set-but-not-used variable in setup_topa_index(); this patch gets rid of it. And while at it, fixes a style issue with brackets around a one-line block. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1429622177-22843-2-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_pt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index 5b804f96ad66..8a4595650d6e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -674,7 +674,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf) struct topa *cur = buf->first, *prev = buf->last; struct topa_entry *te_cur = TOPA_ENTRY(cur, 0), *te_prev = TOPA_ENTRY(prev, prev->last - 1); - int pg = 0, idx = 0, ntopa = 0; + int pg = 0, idx = 0; while (pg < buf->nr_pages) { int tidx; @@ -689,9 +689,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf) /* advance to next topa table */ idx = 0; cur = list_entry(cur->list.next, struct topa, list); - ntopa++; - } else + } else { idx++; + } te_cur = TOPA_ENTRY(cur, idx); } -- cgit v1.2.3 From cf302bfdf3039853fce812ae1ffd0ac24f5b468f Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 21 Apr 2015 16:16:15 +0300 Subject: perf/x86/intel/pt: Document pt_buffer_reset_markers() The comments in the driver don't make it absolutely clear as to what exactly is the calling order and other possible constraints of buffer management functions. Document constraints and calling order for the buffer configuration functions. While at it, replace a redundant check in pt_buffer_reset_markers() with an explanation why it is not needed. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1429622177-22843-4-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_pt.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index 8a4595650d6e..b2746eafa0cd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -609,7 +609,12 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg) * @handle: Current output handle. * * Place INT and STOP marks to prevent overwriting old data that the consumer - * hasn't yet collected. + * hasn't yet collected and waking up the consumer after a certain fraction of + * the buffer has filled up. Only needed and sensible for non-snapshot counters. + * + * This obviously relies on buf::head to figure out buffer markers, so it has + * to be called after pt_buffer_reset_offsets() and before the hardware tracing + * is enabled. */ static int pt_buffer_reset_markers(struct pt_buffer *buf, struct perf_output_handle *handle) @@ -618,9 +623,6 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, unsigned long head = local64_read(&buf->head); unsigned long idx, npages, wakeup; - if (buf->snapshot) - return 0; - /* can't stop in the middle of an output region */ if (buf->output_off + handle->size + 1 < sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) @@ -901,6 +903,7 @@ void intel_pt_interrupt(void) } pt_buffer_reset_offsets(buf, pt->handle.head); + /* snapshot counters don't use PMI, so it's safe */ ret = pt_buffer_reset_markers(buf, &pt->handle); if (ret) { perf_aux_output_end(&pt->handle, 0, true); -- cgit v1.2.3 From 5b1dbd17c0dee679b154ce47f534677b7e0f7ad6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 21 Apr 2015 16:16:16 +0300 Subject: perf/x86/intel/pt: Document pt_buffer_reset_offsets() Currently, the description of pt_buffer_reset_offsets() lacks information about its calling constraints and ordering with regards to other buffer management functions. Add a clarification about when this function has to be called. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1429622177-22843-5-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_pt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index b2746eafa0cd..40ba5e4312d4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -705,7 +705,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf) * @head: Write pointer (aux_head) from AUX buffer. * * Find the ToPA table and entry corresponding to given @head and set buffer's - * "current" pointers accordingly. + * "current" pointers accordingly. This is done after we have obtained the + * current aux_head position from a successful call to perf_aux_output_begin() + * to make sure the hardware is writing to the right place. + * + * This function modifies buf::{cur,cur_idx,output_off} that will be programmed + * into PT msrs when the tracing is enabled and buf::head and buf::data_size, + * which are used to determine INT and STOP markers' locations by a subsequent + * call to pt_buffer_reset_markers(). */ static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) { -- cgit v1.2.3 From 0a487aad2dfd088bcbbe1766944280b40ff969a5 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 21 Apr 2015 16:16:17 +0300 Subject: perf/x86/intel/pt: Kill pt_is_running() Initially, we were trying to guard against scenarios where somebody attaches to the system with a hardware debugger while PT is enabled from software and pt_is_running() tries to make sure we handle this better, but the truth is, there is still a race window no matter what and people with hardware debuggers should really know what they are doing anyway. In other words, there is no point in keeping this one around, and it's one RDMSR instructions fewer in the fast path. The case when PT is enabled by the BIOS at boot time is handled in the driver initialization path and doesn't use pt_is_running(). This patch gets rid of it. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1429622177-22843-6-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_pt.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index 40ba5e4312d4..a2d407172d61 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event) * These all are cpu affine and operate on a local PT */ -static bool pt_is_running(void) -{ - u64 ctl; - - rdmsrl(MSR_IA32_RTIT_CTL, ctl); - - return !!(ctl & RTIT_CTL_TRACEEN); -} - static void pt_config(struct perf_event *event) { u64 reg;