diff options
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 1 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 26 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1017 |
3 files changed, 673 insertions, 371 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 8561e7ddca59..8792ad8dbf83 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -10,6 +10,7 @@ endif turbostat : turbostat.c CFLAGS += -Wall CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' +CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' %: %.c @mkdir -p $(BUILD_OUTPUT) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 492e84fbebfa..03cb639b292e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -25,9 +25,27 @@ Some information is not available on older processors. .SS Options Options can be specified with a single or double '-', and only as much of the option name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. -\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. .PP -\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. +\fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. +.nf + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP} + msrDDD is a decimal offset, eg. msr16 + msr0xXXX is a hex offset, eg. msr0x10 + + scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} + sample and print the counter for every cpu, core, or package. + default: cpu + + size: {\fBu32\fP | \fBu64\fP } + MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits. + default: u64 + + format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP} + 'raw' shows the MSR contents in hex. + 'delta' shows the difference in values during the measurement interval. + 'percent' shows the delta as a percentage of the cycles elapsed. + default: delta +.fi .PP \fB--Dump\fP displays the raw counter values. .PP @@ -43,10 +61,6 @@ The file is truncated if it already exists, and it is created if it does not exi .PP \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. .PP -\fB--MSR MSR#\fP shows the specified 64-bit MSR value. -.PP -\fB--msr MSR#\fP shows the specified 32-bit MSR value. -.PP \fB--Package\fP limits output to the system summary plus the 1st thread in each Package. .PP \fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3e199b508a96..f13f61b065c6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -21,6 +21,7 @@ #define _GNU_SOURCE #include MSRHEADER +#include INTEL_FAMILY_HEADER #include <stdarg.h> #include <stdio.h> #include <err.h> @@ -51,8 +52,6 @@ unsigned int debug; unsigned int rapl_joules; unsigned int summary_only; unsigned int dump_only; -unsigned int skip_c0; -unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; unsigned int do_knl_cstates; @@ -72,10 +71,6 @@ unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nhm_platform_info; -unsigned int extra_msr_offset32; -unsigned int extra_msr_offset64; -unsigned int extra_delta_offset32; -unsigned int extra_delta_offset64; unsigned int aperf_mperf_multiplier = 1; int do_irq = 1; int do_smi; @@ -131,9 +126,8 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ #define RAPL_DRAM_POWER_INFO (1 << 5) /* 0x61c MSR_DRAM_POWER_INFO */ -#define RAPL_CORES (1 << 6) +#define RAPL_CORES_POWER_LIMIT (1 << 6) /* 0x638 MSR_PP0_POWER_LIMIT */ - /* 0x639 MSR_PP0_ENERGY_STATUS */ #define RAPL_CORE_POLICY (1 << 7) /* 0x63a MSR_PP0_POLICY */ @@ -141,11 +135,20 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ /* 0x640 MSR_PP1_POWER_LIMIT */ /* 0x641 MSR_PP1_ENERGY_STATUS */ /* 0x642 MSR_PP1_POLICY */ + +#define RAPL_CORES_ENERGY_STATUS (1 << 9) + /* 0x639 MSR_PP0_ENERGY_STATUS */ +#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) #define TJMAX_DEFAULT 100 #define MAX(a, b) ((a) > (b) ? (a) : (b)) -int aperf_mperf_unstable; +/* + * buffer size used by sscanf() for added column names + * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters + */ +#define NAME_BYTES 20 + int backwards_count; char *progname; @@ -157,16 +160,13 @@ struct thread_data { unsigned long long aperf; unsigned long long mperf; unsigned long long c1; - unsigned long long extra_msr64; - unsigned long long extra_delta64; - unsigned long long extra_msr32; - unsigned long long extra_delta32; unsigned int irq_count; unsigned int smi_count; unsigned int cpu_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 + unsigned long long counter[1]; } *thread_even, *thread_odd; struct core_data { @@ -175,6 +175,7 @@ struct core_data { unsigned long long c7; unsigned int core_temp_c; unsigned int core_id; + unsigned long long counter[1]; } *core_even, *core_odd; struct pkg_data { @@ -199,7 +200,7 @@ struct pkg_data { unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; - + unsigned long long counter[1]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -213,11 +214,33 @@ struct pkg_data { (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) +enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; +enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS}; +enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT}; + +struct msr_counter { + unsigned int msr_num; + char name[NAME_BYTES]; + unsigned int width; + enum counter_type type; + enum counter_format format; + struct msr_counter *next; +}; + +struct sys_counters { + unsigned int thread_counter_bytes; + unsigned int core_counter_bytes; + unsigned int package_counter_bytes; + struct msr_counter *tp; + struct msr_counter *cp; + struct msr_counter *pp; +} sys; + struct system_summary { struct thread_data threads; struct core_data cores; struct pkg_data packages; -} sum, average; +} average; struct topo_params { @@ -319,120 +342,148 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) /* * Example Format w/ field column widths: * - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt + * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp CoreCnt PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 */ void print_header(void) { + struct msr_counter *mp; + if (show_pkg) - outp += sprintf(outp, " Package"); + outp += sprintf(outp, "\tPackage"); if (show_core) - outp += sprintf(outp, " Core"); + outp += sprintf(outp, "\tCore"); if (show_cpu) - outp += sprintf(outp, " CPU"); + outp += sprintf(outp, "\tCPU"); if (has_aperf) - outp += sprintf(outp, " Avg_MHz"); + outp += sprintf(outp, "\tAvg_MHz"); if (has_aperf) - outp += sprintf(outp, " Busy%%"); + outp += sprintf(outp, "\tBusy%%"); if (has_aperf) - outp += sprintf(outp, " Bzy_MHz"); - outp += sprintf(outp, " TSC_MHz"); - - if (extra_delta_offset32) - outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); - if (extra_delta_offset64) - outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); - if (extra_msr_offset32) - outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); - if (extra_msr_offset64) - outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); + outp += sprintf(outp, "\tBzy_MHz"); + outp += sprintf(outp, "\tTSC_MHz"); if (!debug) goto done; if (do_irq) - outp += sprintf(outp, " IRQ"); + outp += sprintf(outp, "\tIRQ"); if (do_smi) - outp += sprintf(outp, " SMI"); + outp += sprintf(outp, "\tSMI"); if (do_nhm_cstates) - outp += sprintf(outp, " CPU%%c1"); + outp += sprintf(outp, "\tCPU%%c1"); if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) - outp += sprintf(outp, " CPU%%c3"); + outp += sprintf(outp, "\tCPU%%c3"); if (do_nhm_cstates) - outp += sprintf(outp, " CPU%%c6"); + outp += sprintf(outp, "\tCPU%%c6"); if (do_snb_cstates) - outp += sprintf(outp, " CPU%%c7"); + outp += sprintf(outp, "\tCPU%%c7"); + + for (mp = sys.tp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "\t%18.18s", mp->name); + else + outp += sprintf(outp, "\t%10.10s", mp->name); + } else { + outp += sprintf(outp, "\t%-7.7s", mp->name); + } + } if (do_dts) - outp += sprintf(outp, " CoreTmp"); + outp += sprintf(outp, "\tCoreTmp"); + + for (mp = sys.cp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "\t%18.18s", mp->name); + else + outp += sprintf(outp, "\t%10.10s", mp->name); + } else { + outp += sprintf(outp, "\t%-7.7s", mp->name); + } + } + if (do_ptm) - outp += sprintf(outp, " PkgTmp"); + outp += sprintf(outp, "\tPkgTmp"); if (do_gfx_rc6_ms) - outp += sprintf(outp, " GFX%%rc6"); + outp += sprintf(outp, "\tGFX%%rc6"); if (do_gfx_mhz) - outp += sprintf(outp, " GFXMHz"); + outp += sprintf(outp, "\tGFXMHz"); if (do_skl_residency) { - outp += sprintf(outp, " Totl%%C0"); - outp += sprintf(outp, " Any%%C0"); - outp += sprintf(outp, " GFX%%C0"); - outp += sprintf(outp, " CPUGFX%%"); + outp += sprintf(outp, "\tTotl%%C0"); + outp += sprintf(outp, "\tAny%%C0"); + outp += sprintf(outp, "\tGFX%%C0"); + outp += sprintf(outp, "\tCPUGFX%%"); } if (do_pc2) - outp += sprintf(outp, " Pkg%%pc2"); + outp += sprintf(outp, "\tPkg%%pc2"); if (do_pc3) - outp += sprintf(outp, " Pkg%%pc3"); + outp += sprintf(outp, "\tPkg%%pc3"); if (do_pc6) - outp += sprintf(outp, " Pkg%%pc6"); + outp += sprintf(outp, "\tPkg%%pc6"); if (do_pc7) - outp += sprintf(outp, " Pkg%%pc7"); + outp += sprintf(outp, "\tPkg%%pc7"); if (do_c8_c9_c10) { - outp += sprintf(outp, " Pkg%%pc8"); - outp += sprintf(outp, " Pkg%%pc9"); - outp += sprintf(outp, " Pk%%pc10"); + outp += sprintf(outp, "\tPkg%%pc8"); + outp += sprintf(outp, "\tPkg%%pc9"); + outp += sprintf(outp, "\tPk%%pc10"); } if (do_rapl && !rapl_joules) { if (do_rapl & RAPL_PKG) - outp += sprintf(outp, " PkgWatt"); - if (do_rapl & RAPL_CORES) - outp += sprintf(outp, " CorWatt"); + outp += sprintf(outp, "\tPkgWatt"); + if (do_rapl & RAPL_CORES_ENERGY_STATUS) + outp += sprintf(outp, "\tCorWatt"); if (do_rapl & RAPL_GFX) - outp += sprintf(outp, " GFXWatt"); + outp += sprintf(outp, "\tGFXWatt"); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, " RAMWatt"); + outp += sprintf(outp, "\tRAMWatt"); if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, " PKG_%%"); + outp += sprintf(outp, "\tPKG_%%"); if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, " RAM_%%"); + outp += sprintf(outp, "\tRAM_%%"); } else if (do_rapl && rapl_joules) { if (do_rapl & RAPL_PKG) - outp += sprintf(outp, " Pkg_J"); - if (do_rapl & RAPL_CORES) - outp += sprintf(outp, " Cor_J"); + outp += sprintf(outp, "\tPkg_J"); + if (do_rapl & RAPL_CORES_ENERGY_STATUS) + outp += sprintf(outp, "\tCor_J"); if (do_rapl & RAPL_GFX) - outp += sprintf(outp, " GFX_J"); + outp += sprintf(outp, "\tGFX_J"); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, " RAM_J"); + outp += sprintf(outp, "\tRAM_J"); if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, " PKG_%%"); + outp += sprintf(outp, "\tPKG_%%"); if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, " RAM_%%"); - outp += sprintf(outp, " time"); - + outp += sprintf(outp, "\tRAM_%%"); } - done: + for (mp = sys.pp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "\t%18.18s", mp->name); + else + outp += sprintf(outp, "\t%10.10s", mp->name); + } else { + outp += sprintf(outp, "\t%-7.7s", mp->name); + } + } + +done: outp += sprintf(outp, "\n"); } int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); if (t) { @@ -442,18 +493,16 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "aperf: %016llX\n", t->aperf); outp += sprintf(outp, "mperf: %016llX\n", t->mperf); outp += sprintf(outp, "c1: %016llX\n", t->c1); - outp += sprintf(outp, "msr0x%x: %08llX\n", - extra_delta_offset32, t->extra_delta32); - outp += sprintf(outp, "msr0x%x: %016llX\n", - extra_delta_offset64, t->extra_delta64); - outp += sprintf(outp, "msr0x%x: %08llX\n", - extra_msr_offset32, t->extra_msr32); - outp += sprintf(outp, "msr0x%x: %016llX\n", - extra_msr_offset64, t->extra_msr64); + if (do_irq) outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); if (do_smi) outp += sprintf(outp, "SMI: %08X\n", t->smi_count); + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", + i, mp->msr_num, t->counter[i]); + } } if (c) { @@ -462,6 +511,11 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", + i, mp->msr_num, c->counter[i]); + } } if (p) { @@ -491,6 +545,11 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", + i, mp->msr_num, p->counter[i]); + } } outp += sprintf(outp, "\n"); @@ -506,6 +565,8 @@ int format_counters(struct thread_data *t, struct core_data *c, { double interval_float; char *fmt8; + int i; + struct msr_counter *mp; /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) @@ -520,99 +581,103 @@ int format_counters(struct thread_data *t, struct core_data *c, /* topo columns, print blanks on 1st (average) line */ if (t == &average.threads) { if (show_pkg) - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); if (show_core) - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); if (show_cpu) - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); } else { if (show_pkg) { if (p) - outp += sprintf(outp, "%8d", p->package_id); + outp += sprintf(outp, "\t%d", p->package_id); else - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); } if (show_core) { if (c) - outp += sprintf(outp, "%8d", c->core_id); + outp += sprintf(outp, "\t%d", c->core_id); else - outp += sprintf(outp, " -"); + outp += sprintf(outp, "\t-"); } if (show_cpu) - outp += sprintf(outp, "%8d", t->cpu_id); + outp += sprintf(outp, "\t%d", t->cpu_id); } /* Avg_MHz */ if (has_aperf) - outp += sprintf(outp, "%8.0f", + outp += sprintf(outp, "\t%.0f", 1.0 / units * t->aperf / interval_float); /* Busy% */ - if (has_aperf) { - if (!skip_c0) - outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); - else - outp += sprintf(outp, "********"); - } + if (has_aperf) + outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); /* Bzy_MHz */ if (has_aperf) { if (has_base_hz) - outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf); + outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf); else - outp += sprintf(outp, "%8.0f", + outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); } /* TSC_MHz */ - outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); - - /* delta */ - if (extra_delta_offset32) - outp += sprintf(outp, " %11llu", t->extra_delta32); - - /* DELTA */ - if (extra_delta_offset64) - outp += sprintf(outp, " %11llu", t->extra_delta64); - /* msr */ - if (extra_msr_offset32) - outp += sprintf(outp, " 0x%08llx", t->extra_msr32); - - /* MSR */ - if (extra_msr_offset64) - outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float); if (!debug) goto done; /* IRQ */ if (do_irq) - outp += sprintf(outp, "%8d", t->irq_count); + outp += sprintf(outp, "\t%d", t->irq_count); /* SMI */ if (do_smi) - outp += sprintf(outp, "%8d", t->smi_count); + outp += sprintf(outp, "\t%d", t->smi_count); - if (do_nhm_cstates) { - if (!skip_c1) - outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); - else - outp += sprintf(outp, "********"); - } + if (do_nhm_cstates) + outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc); /* print per-core data only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) - outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) - outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc); if (do_snb_cstates) - outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc); + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]); + else + outp += sprintf(outp, "\t0x%016llx", t->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + outp += sprintf(outp, "\t%8lld", t->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc); + } + } + if (do_dts) - outp += sprintf(outp, "%8d", c->core_temp_c); + outp += sprintf(outp, "\t%d", c->core_temp_c); + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]); + else + outp += sprintf(outp, "\t0x%016llx", c->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + outp += sprintf(outp, "\t%8lld", c->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc); + } + } /* print per-package data only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) @@ -620,42 +685,42 @@ int format_counters(struct thread_data *t, struct core_data *c, /* PkgTmp */ if (do_ptm) - outp += sprintf(outp, "%8d", p->pkg_temp_c); + outp += sprintf(outp, "\t%d", p->pkg_temp_c); /* GFXrc6 */ if (do_gfx_rc6_ms) { - if (p->gfx_rc6_ms == -1) { /* detect counter reset */ - outp += sprintf(outp, " ***.**"); + if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ + outp += sprintf(outp, "\t**.**"); } else { - outp += sprintf(outp, "%8.2f", + outp += sprintf(outp, "\t%.2f", p->gfx_rc6_ms / 10.0 / interval_float); } } /* GFXMHz */ if (do_gfx_mhz) - outp += sprintf(outp, "%8d", p->gfx_mhz); + outp += sprintf(outp, "\t%d", p->gfx_mhz); /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (do_skl_residency) { - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); } if (do_pc2) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc); if (do_pc3) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc); if (do_pc6) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc); if (do_pc7) - outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc); if (do_c8_c9_c10) { - outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc); - outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc); + outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc); } /* @@ -663,14 +728,14 @@ int format_counters(struct thread_data *t, struct core_data *c, * indicate that results are suspect by printing "**" in fraction place. */ if (interval_float < rapl_joule_counter_range) - fmt8 = "%8.2f"; + fmt8 = "\t%.2f"; else - fmt8 = " %6.0f**"; + fmt8 = "%6.0f**"; if (do_rapl && !rapl_joules) { if (do_rapl & RAPL_PKG) outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float); - if (do_rapl & RAPL_CORES) + if (do_rapl & RAPL_CORES_ENERGY_STATUS) outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float); if (do_rapl & RAPL_GFX) outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); @@ -697,9 +762,20 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - - outp += sprintf(outp, fmt8, interval_float); } + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]); + else + outp += sprintf(outp, "\t0x%016llx", p->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + outp += sprintf(outp, "\t%8lld", p->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc); + } + } + done: outp += sprintf(outp, "\n"); @@ -752,9 +828,11 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ old = 0x100000000 + new - old; \ } -void +int delta_package(struct pkg_data *new, struct pkg_data *old) { + int i; + struct msr_counter *mp; if (do_skl_residency) { old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; @@ -788,24 +866,46 @@ delta_package(struct pkg_data *new, struct pkg_data *old) DELTA_WRAP32(new->energy_dram, old->energy_dram); DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } + + return 0; } void delta_core(struct core_data *new, struct core_data *old) { + int i; + struct msr_counter *mp; + old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } } /* * old = new - old */ -void +int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) { + int i; + struct msr_counter *mp; + old->tsc = new->tsc - old->tsc; /* check for TSC < 1 Mcycles over interval */ @@ -821,20 +921,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->aperf = new->aperf - old->aperf; old->mperf = new->mperf - old->mperf; } else { - - if (!aperf_mperf_unstable) { - fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname); - fprintf(outf, "* Frequency results do not cover entire interval *\n"); - fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n"); - - aperf_mperf_unstable = 1; - } - /* - * mperf delta is likely a huge "positive" number - * can not use it for calculating c0 time - */ - skip_c0 = 1; - skip_c1 = 1; + return -1; } } @@ -865,52 +952,53 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->mperf = 1; /* divide by 0 protection */ } - old->extra_delta32 = new->extra_delta32 - old->extra_delta32; - old->extra_delta32 &= 0xFFFFFFFF; - - old->extra_delta64 = new->extra_delta64 - old->extra_delta64; - - /* - * Extra MSR is just a snapshot, simply copy latest w/o subtracting - */ - old->extra_msr32 = new->extra_msr32; - old->extra_msr64 = new->extra_msr64; - if (do_irq) old->irq_count = new->irq_count - old->irq_count; if (do_smi) old->smi_count = new->smi_count - old->smi_count; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } + return 0; } int delta_cpu(struct thread_data *t, struct core_data *c, struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) { + int retval = 0; + /* calculate core delta only for 1st thread in core */ if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) delta_core(c, c2); /* always calculate thread delta */ - delta_thread(t, t2, c2); /* c2 is core delta */ + retval = delta_thread(t, t2, c2); /* c2 is core delta */ + if (retval) + return retval; /* calculate package delta only for 1st core in package */ if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) - delta_package(p, p2); + retval = delta_package(p, p2); - return 0; + return retval; } void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + t->tsc = 0; t->aperf = 0; t->mperf = 0; t->c1 = 0; - t->extra_delta32 = 0; - t->extra_delta64 = 0; - t->irq_count = 0; t->smi_count = 0; @@ -948,21 +1036,36 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->gfx_rc6_ms = 0; p->gfx_mhz = 0; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) + t->counter[i] = 0; + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) + c->counter[i] = 0; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) + p->counter[i] = 0; } int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + average.threads.tsc += t->tsc; average.threads.aperf += t->aperf; average.threads.mperf += t->mperf; average.threads.c1 += t->c1; - average.threads.extra_delta32 += t->extra_delta32; - average.threads.extra_delta64 += t->extra_delta64; - average.threads.irq_count += t->irq_count; average.threads.smi_count += t->smi_count; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.threads.counter[i] += t->counter[i]; + } + /* sum per-core values only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -973,6 +1076,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.cores.counter[i] += c->counter[i]; + } + /* sum per-pkg values only for 1st core in pkg */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; @@ -1007,6 +1116,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.packages.counter[i] += p->counter[i]; + } return 0; } /* @@ -1016,6 +1131,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + int i; + struct msr_counter *mp; + clear_counters(&average.threads, &average.cores, &average.packages); for_all_cpus(sum_counters, t, c, p); @@ -1025,11 +1143,6 @@ void compute_average(struct thread_data *t, struct core_data *c, average.threads.mperf /= topo.num_cpus; average.threads.c1 /= topo.num_cpus; - average.threads.extra_delta32 /= topo.num_cpus; - average.threads.extra_delta32 &= 0xFFFFFFFF; - - average.threads.extra_delta64 /= topo.num_cpus; - average.cores.c3 /= topo.num_cores; average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; @@ -1052,6 +1165,22 @@ void compute_average(struct thread_data *t, struct core_data *c, average.packages.pc8 /= topo.num_packages; average.packages.pc9 /= topo.num_packages; average.packages.pc10 /= topo.num_packages; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.threads.counter[i] /= topo.num_cpus; + } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.cores.counter[i] /= topo.num_cores; + } + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.packages.counter[i] /= topo.num_packages; + } } static unsigned long long rdtsc(void) @@ -1073,6 +1202,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int cpu = t->cpu_id; unsigned long long msr; int aperf_mperf_retry_count = 0; + struct msr_counter *mp; + int i; if (cpu_migrate(cpu)) { fprintf(outf, "Could not migrate to CPU %d\n", cpu); @@ -1145,31 +1276,18 @@ retry: return -5; t->smi_count = msr & 0xFFFFFFFF; } - if (extra_delta_offset32) { - if (get_msr(cpu, extra_delta_offset32, &msr)) - return -5; - t->extra_delta32 = msr & 0xFFFFFFFF; - } - - if (extra_delta_offset64) - if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) - return -5; - - if (extra_msr_offset32) { - if (get_msr(cpu, extra_msr_offset32, &msr)) - return -5; - t->extra_msr32 = msr & 0xFFFFFFFF; - } - - if (extra_msr_offset64) - if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) - return -5; if (use_c1_residency_msr) { if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) return -6; } + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (get_msr(cpu, mp->msr_num, &t->counter[i])) + return -10; + } + + /* collect core counters only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -1197,6 +1315,10 @@ retry: c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (get_msr(cpu, mp->msr_num, &c->counter[i])) + return -10; + } /* collect package counters only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) @@ -1237,7 +1359,7 @@ retry: return -13; p->energy_pkg = msr & 0xFFFFFFFF; } - if (do_rapl & RAPL_CORES) { + if (do_rapl & RAPL_CORES_ENERGY_STATUS) { if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; p->energy_cores = msr & 0xFFFFFFFF; @@ -1274,6 +1396,11 @@ retry: if (do_gfx_mhz) p->gfx_mhz = gfx_cur_mhz; + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (get_msr(cpu, mp->msr_num, &p->counter[i])) + return -10; + } + return 0; } @@ -1310,6 +1437,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; static void @@ -1638,7 +1766,7 @@ void free_fd_percpu(void) { int i; - for (i = 0; i < topo.max_cpu_num; ++i) { + for (i = 0; i < topo.max_cpu_num + 1; ++i) { if (fd_percpu[i] != 0) close(fd_percpu[i]); } @@ -2071,7 +2199,10 @@ restart: } gettimeofday(&tv_odd, (struct timezone *)NULL); timersub(&tv_odd, &tv_even, &tv_delta); - for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); + if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { + re_initialize(); + goto restart; + } compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_output_stdout(); @@ -2087,7 +2218,10 @@ restart: } gettimeofday(&tv_even, (struct timezone *)NULL); timersub(&tv_even, &tv_odd, &tv_delta); - for_all_cpus_2(delta_cpu, EVE |