summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2017-07-03 14:17:16 +0200
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2017-07-03 14:17:16 +0200
commit16b5b092407d3fbe23431ca58804fd1481f811b2 (patch)
tree12cedc83119caf811a19e53588b63cb24de46f0d /tools
parent875aabf52e21f84cb6700a760dc734c260c54e7e (diff)
parent902bef73faa99b8c024e0f18c6199872b7cccb52 (diff)
Merge branch 'pm-tools'
* pm-tools: cpupower: Add support for new AMD family 0x17 cpupower: Fix bug where return value was not used tools/power turbostat: update version number tools/power turbostat: decode MSR_IA32_MISC_ENABLE only on Intel tools/power turbostat: stop migrating, unless '-m' tools/power turbostat: if --debug, print sampling overhead tools/power turbostat: hide SKL counters, when not requested intel_pstate: use updated msr-index.h HWP.EPP values tools/power x86_energy_perf_policy: support HWP.EPP x86: msr-index.h: fix shifts to ULL results in HWP macros. x86: msr-index.h: define HWP.EPP values x86: msr-index.h: define EPB mid-points
Diffstat (limited to 'tools')
-rw-r--r--tools/power/cpupower/utils/helpers/amd.c31
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h2
-rw-r--r--tools/power/cpupower/utils/helpers/misc.c23
-rw-r--r--tools/power/x86/turbostat/turbostat.c94
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile27
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8241
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c1504
7 files changed, 1621 insertions, 301 deletions
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 6437ef39aeea..5fd5c5b8c7b8 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -26,6 +26,15 @@ union msr_pstate {
unsigned res3:21;
unsigned en:1;
} bits;
+ struct {
+ unsigned fid:8;
+ unsigned did:6;
+ unsigned vid:8;
+ unsigned iddval:8;
+ unsigned idddiv:2;
+ unsigned res1:30;
+ unsigned en:1;
+ } fam17h_bits;
unsigned long long val;
};
@@ -35,6 +44,8 @@ static int get_did(int family, union msr_pstate pstate)
if (family == 0x12)
t = pstate.val & 0xf;
+ else if (family == 0x17)
+ t = pstate.fam17h_bits.did;
else
t = pstate.bits.did;
@@ -44,16 +55,20 @@ static int get_did(int family, union msr_pstate pstate)
static int get_cof(int family, union msr_pstate pstate)
{
int t;
- int fid, did;
+ int fid, did, cof;
did = get_did(family, pstate);
-
- t = 0x10;
- fid = pstate.bits.fid;
- if (family == 0x11)
- t = 0x8;
-
- return (100 * (fid + t)) >> did;
+ if (family == 0x17) {
+ fid = pstate.fam17h_bits.fid;
+ cof = 200 * fid / did;
+ } else {
+ t = 0x10;
+ fid = pstate.bits.fid;
+ if (family == 0x11)
+ t = 0x8;
+ cof = (100 * (fid + t)) >> did;
+ }
+ return cof;
}
/* Needs:
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index afb66f80554e..799a18be60aa 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -70,6 +70,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
#define CPUPOWER_CAP_IS_SNB 0x00000020
#define CPUPOWER_CAP_INTEL_IDA 0x00000040
+#define CPUPOWER_AMD_CPBDIS 0x02000000
+
#define MAX_HW_PSTATES 10
struct cpupower_cpu_info {
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 1609243f5c64..601d719d4e60 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -2,11 +2,14 @@
#include "helpers/helpers.h"
+#define MSR_AMD_HWCR 0xc0010015
+
int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
int *states)
{
struct cpupower_cpu_info cpu_info;
int ret;
+ unsigned long long val;
*support = *active = *states = 0;
@@ -16,10 +19,22 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) {
*support = 1;
- amd_pci_get_num_boost_states(active, states);
- if (ret <= 0)
- return ret;
- *support = 1;
+
+ /* AMD Family 0x17 does not utilize PCI D18F4 like prior
+ * families and has no fixed discrete boost states but
+ * has Hardware determined variable increments instead.
+ */
+
+ if (cpu_info.family == 0x17) {
+ if (!read_msr(cpu, MSR_AMD_HWCR, &val)) {
+ if (!(val & CPUPOWER_AMD_CPBDIS))
+ *active = 1;
+ }
+ } else {
+ ret = amd_pci_get_num_boost_states(active, states);
+ if (ret)
+ return ret;
+ }
} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA)
*support = *active = 1;
return 0;
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index b11294730771..0dafba2c1e7d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -57,7 +57,6 @@ unsigned int list_header_only;
unsigned int dump_only;
unsigned int do_snb_cstates;
unsigned int do_knl_cstates;
-unsigned int do_skl_residency;
unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
@@ -93,6 +92,7 @@ unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
+int do_migrate;
double discover_bclk(unsigned int family, unsigned int model);
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
@@ -151,6 +151,8 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_COUNTERS 16
struct thread_data {
+ struct timeval tv_begin;
+ struct timeval tv_end;
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
@@ -301,6 +303,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg
int cpu_migrate(int cpu)
{
+ if (!do_migrate)
+ return 0;
+
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
@@ -384,8 +389,14 @@ struct msr_counter bic[] = {
{ 0x0, "CPU" },
{ 0x0, "Mod%c6" },
{ 0x0, "sysfs" },
+ { 0x0, "Totl%C0" },
+ { 0x0, "Any%C0" },
+ { 0x0, "GFX%C0" },
+ { 0x0, "CPUGFX%" },
};
+
+
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
#define BIC_Package (1ULL << 0)
#define BIC_Avg_MHz (1ULL << 1)
@@ -426,6 +437,10 @@ struct msr_counter bic[] = {
#define BIC_CPU (1ULL << 36)
#define BIC_Mod_c6 (1ULL << 37)
#define BIC_sysfs (1ULL << 38)
+#define BIC_Totl_c0 (1ULL << 39)
+#define BIC_Any_c0 (1ULL << 40)
+#define BIC_GFX_c0 (1ULL << 41)
+#define BIC_CPUGFX (1ULL << 42)
unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
unsigned long long bic_present = BIC_sysfs;
@@ -521,6 +536,8 @@ void print_header(char *delim)
struct msr_counter *mp;
int printed = 0;
+ if (debug)
+ outp += sprintf(outp, "usec %s", delim);
if (DO_BIC(BIC_Package))
outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
if (DO_BIC(BIC_Core))
@@ -599,12 +616,14 @@ void print_header(char *delim)
if (DO_BIC(BIC_GFXMHz))
outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Any_c0))
outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFX_c0))
outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPUGFX))
outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
- }
if (DO_BIC(BIC_Pkgpc2))
outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
@@ -771,6 +790,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
return 0;
+ if (debug) {
+ /* on each row, print how many usec each timestamp took to gather */
+ struct timeval tv;
+
+ timersub(&t->tv_end, &t->tv_begin, &tv);
+ outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
+ }
+
interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
tsc = t->tsc * tsc_tweak;
@@ -912,12 +939,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
+ if (DO_BIC(BIC_Any_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
+ if (DO_BIC(BIC_GFX_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
+ if (DO_BIC(BIC_CPUGFX))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
- }
if (DO_BIC(BIC_Pkgpc2))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
@@ -1038,12 +1067,16 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
int i;
struct msr_counter *mp;
- if (do_skl_residency) {
+
+ if (DO_BIC(BIC_Totl_c0))
old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
+ if (DO_BIC(BIC_Any_c0))
old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
+ if (DO_BIC(BIC_GFX_c0))
old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
+ if (DO_BIC(BIC_CPUGFX))
old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
- }
+
old->pc2 = new->pc2 - old->pc2;
if (DO_BIC(BIC_Pkgpc3))
old->pc3 = new->pc3 - old->pc3;
@@ -1292,12 +1325,14 @@ int sum_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
+ if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
+ if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
+ if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
- }
average.packages.pc2 += p->pc2;
if (DO_BIC(BIC_Pkgpc3))
@@ -1357,12 +1392,14 @@ void compute_average(struct thread_data *t, struct core_data *c,
average.cores.c7 /= topo.num_cores;
average.cores.mc6_us /= topo.num_cores;
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
- }
average.packages.pc2 /= topo.num_packages;
if (DO_BIC(BIC_Pkgpc3))
@@ -1482,6 +1519,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
struct msr_counter *mp;
int i;
+
+ gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
@@ -1565,7 +1605,7 @@ retry:
/* collect core counters only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
- return 0;
+ goto done;
if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
@@ -1601,15 +1641,21 @@ retry:
/* collect package counters only for 1st core in package */
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
- return 0;
+ goto done;
- if (do_skl_residency) {
+ if (DO_BIC(BIC_Totl_c0)) {
if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
return -10;
+ }
+ if (DO_BIC(BIC_Any_c0)) {
if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
return -11;
+ }
+ if (DO_BIC(BIC_GFX_c0)) {
if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
return -12;
+ }
+ if (DO_BIC(BIC_CPUGFX)) {
if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
return -13;
}
@@ -1688,6 +1734,8 @@ retry:
if (get_mp(cpu, mp, &p->counter[i]))
return -10;
}
+done:
+ gettimeofday(&t->tv_end, (struct timezone *)NULL);
return 0;
}
@@ -3895,6 +3943,9 @@ void decode_misc_enable_msr(void)
{
unsigned long long msr;
+ if (!genuine_intel)
+ return;
+
if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
base_cpu, msr,
@@ -4198,7 +4249,12 @@ void process_cpuid()
BIC_PRESENT(BIC_Pkgpc10);
}
do_irtl_hsw = has_hsw_msrs(family, model);
- do_skl_residency = has_skl_msrs(family, model);
+ if (has_skl_msrs(family, model)) {
+ BIC_PRESENT(BIC_Totl_c0);
+ BIC_PRESENT(BIC_Any_c0);
+ BIC_PRESENT(BIC_GFX_c0);
+ BIC_PRESENT(BIC_CPUGFX);
+ }
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
@@ -4578,7 +4634,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(outf, "turbostat version 17.04.12"
+ fprintf(outf, "turbostat version 17.06.23"
" - Len Brown <lenb@kernel.org>\n");
}
@@ -4951,6 +5007,7 @@ void cmdline(int argc, char **argv)
{"hide", required_argument, 0, 'H'}, // meh, -h taken by --help
{"Joules", no_argument, 0, 'J'},
{"list", no_argument, 0, 'l'},
+ {"migrate", no_argument, 0, 'm'},
{"out", required_argument, 0, 'o'},
{"quiet", no_argument, 0, 'q'},
{"show", required_argument, 0, 's'},
@@ -4962,7 +5019,7 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v",
long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
@@ -5005,6 +5062,9 @@ void cmdline(int argc, char **argv)
list_header_only++;
quiet++;
break;
+ case 'm':
+ do_migrate = 1;
+ break;
case 'o':
outf = fopen_or_die(optarg, "w");
break;
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index 971c9ffdcb50..a711eec0c895 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -1,10 +1,27 @@
-DESTDIR ?=
+CC = $(CROSS_COMPILE)gcc
+BUILD_OUTPUT := $(CURDIR)
+PREFIX := /usr
+DESTDIR :=
+
+ifeq ("$(origin O)", "command line")
+ BUILD_OUTPUT := $(O)
+endif
x86_energy_perf_policy : x86_energy_perf_policy.c
+CFLAGS += -Wall
+CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
+
+%: %.c
+ @mkdir -p $(BUILD_OUTPUT)
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
+.PHONY : clean
clean :
- rm -f x86_energy_perf_policy
+ @rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy
+
+install : x86_energy_perf_policy
+ install -d $(DESTDIR)$(PREFIX)/bin
+ install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
+ install -d $(DESTDIR)$(PREFIX)/share/man/man8
+ install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
-install :
- install x86_energy_perf_policy ${DESTDIR}/usr/bin/
- install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
index 8eaaad648cdb..17db1c3af4d0 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -1,104 +1,213 @@
-.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
+.\" This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com>
.\" Distributed under the GPL, Copyleft 1994.
.TH X86_ENERGY_PERF_POLICY 8
.SH NAME
-x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
+x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
.SH SYNOPSIS
-.ft B
.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB "\-r"
+.RB "[ options ] [ scope ] [field \ value]"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'performance'
+.RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'normal'
+.RB "cpu-list, pkg-list: # | #,# | #-# | all"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB 'powersave'
+.RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired"
.br
-.B x86_energy_perf_policy
-.RB [ "\-c cpu" ]
-.RB [ "\-v" ]
-.RB n
+.RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable) value)"
.br
+.RB "value: # | default | performance | balance-performance | balance-power | power"
.SH DESCRIPTION
\fBx86_energy_perf_policy\fP
-allows software to convey
-its policy for the relative importance of performance
-versus energy savings to the processor.
+displays and updates energy-performance policy settings specific to
+Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR)
+updates, no matter if the Linux cpufreq sub-system is enabled or not.
-The processor uses this information in model-specific ways
-when it must select trade-offs between performance and
-energy efficiency.
+Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
+may affect a wide range of hardware decisions,
+such as how aggressively the hardware enters and exits CPU idle states (C-states)
+and Processor Performance States (P-states).
+This policy hint does not replace explicit OS C-state and P-state selection.
+Rather, it tells the hardware how aggressively to implement those selections.
+Further, it allows the OS to influence energy/performance trade-offs where there
+is no software interface, such as in the opportunistic "turbo-mode" P-state range.
+Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU,
+but some implementations
+share a single MSR among all CPUs in each processor package.
+On those systems, a write to EPB on one processor will
+be visible, and will have an effect, on all CPUs
+in the same processor package.
-This policy hint does not supersede Processor Performance states
-(P-states) or CPU Idle power states (C-states), but allows
-software to have influence where it would otherwise be unable
-to express a preference.
+Hardware P-States (HWP) are effectively an expansion of hardware
+P-state control from the opportunistic turbo-mode P-state range
+to include the entire range of available P-states.
+On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP.
+That influence was removed in subsequent generations,
+where it was moved to the
+Energy_Performance_Preference (EPP) field in
+a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG.
-For example, this setting may tell the hardware how
-aggressively or conservatively to control frequency
-in the "turbo range" above the explicitly OS-controlled
-P-state frequency range. It may also tell the hardware
-how aggressively is should enter the OS requested C-states.
+EPP is the most commonly managed knob in HWP mode,
+but MSR_IA32_HWP_REQUEST also allows the user to specify
+minimum-frequency for Quality-of-Service,
+and maximum-frequency for power-capping.
+MSR_IA32_HWP_REQUEST is defined per-CPU.
-Support for this feature is indicated by CPUID.06H.ECX.bit3
-per the Intel Architectures Software Developer's Manual.
+MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST,
+but it can simultaneously set the default policy for all CPUs within a package.
+A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is
+over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG.
-.SS Options
-\fB-c\fP limits operation to a single CPU.
-The default is to operate on all CPUs.
-Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
-logical processor, but that the initial implementations
-of the MSR were shared among all processors in each package.
-.PP
-\fB-v\fP increases verbosity. By default
-x86_energy_perf_policy is silent.
-.PP
-\fB-r\fP is for "read-only" mode - the unchanged state
-is read and displayed.
+MSR_HWP_CAPABILITIES shows the default values for the fields
+in MSR_IA32_HWP_REQUEST. It is displayed when no values
+are being written.
+
+.SS SCOPE OPTIONS
.PP
-.I performance
-Set a policy where performance is paramount.
-The processor will be unwilling to sacrifice any performance
-for the sake of energy saving. This is the hardware default.
+\fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list.
+The CPU-list may be comma-separated CPU numbers, with dash for range
+or the string "all". Eg. '--cpu 1,4,6-8' or '--cpu all'.
+When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu
+MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1),
+or exempt from MSR_IA32_HWP_REQUEST_PKG (0).
+
+\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list.
+The list is a string of individual package numbers separated
+by commas, and or ranges of package numbers separated by a dash,
+or the string "all".
+For example '--pkg 1,3' or '--pkg all'
+
+.SS VALUE OPTIONS
.PP
-.I normal
+.I normal | default
Set a policy with a normal balance between performance and energy efficiency.
The processor will tolerate minor performance compromise
for potentially significant energy savings.
-This reasonable default for most desktops and servers.
+This is a reasonable default for most desktops and servers.
+"default" is a synonym for "normal".
.PP
-.I powersave
+.I performance
+Set a policy for maximum performance,
+accepting no performance sacrifice for the benefit of energy efficiency.
+.PP
+.I balance-performance
+Set a policy with a high priority on performance,
+but allowing some performance loss to benefit energy efficiency.
+.PP
+.I balance-power
+Set a policy where the performance and power are balanced.
+This is the default.
+.PP
+.I power
Set a policy where the processor can accept
-a measurable performance hit to maximize energy efficiency.
+a measurable performance impact to maximize energy efficiency.
+
.PP
-.I n
-Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
-The range of valid numbers is 0-15, where 0 is maximum
-performance and 15 is maximum energy efficiency.
+The following table shows the mapping from the value strings above to actual MSR values.
+This mapping is defined in the Linux-kernel header, msr-index.h.
+.nf
+VALUE STRING EPB EPP
+performance 0 0
+balance-performance 4 128
+normal, default 6 128
+balance-power 8 192
+power 15 255
+.fi
+.PP
+For MSR_IA32_HWP_REQUEST performance fields
+(--hwp-min, --hwp-max, --hwp-desired), the value option
+is in units of 100 MHz, Eg. 12 signifies 1200 MHz.
+
+.SS FIELD OPTIONS
+\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with
+the value-string. In addition, enables turbo-mode and HWP-mode, if they were previous disabled.
+Thus "--all normal" will set a system without cpufreq into a well known configuration.
+.PP
+\fB-B, --epb\fP set EPB per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-d, --debug\fP debug increases verbosity. By default
+x86_energy_perf_policy is silent for updates,
+and verbose for read-only mode.
+.PP
+\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.min.
+.PP
+\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.max.
+.PP
+\fB-D, --hwp-desired\fP request HWP 'desired' frequency.
+The "normal" setting is 0, which
+corresponds to 'full autonomous' HWP control.
+Non-zero performance values request a specific performance
+level on this processor, specified in multiples of 100 MHz.
+.PP
+\fB-w, --hwp-window\fP specify integer number of microsec
+in the sliding window that HWP uses to maintain average frequency.
+This parameter is meaningful only when the "desired" field above is non-zero.
+Default is 0, allowing the HW to choose.
+.SH OTHER OPTIONS
+.PP
+\fB-f, --force\fP writes the specified values without bounds checking.
+.PP
+\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu,
+indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1)
+or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings.
+The default is exempt.
+.PP
+\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode. Once enabled, system RESET is required to disable HWP mode.
+.PP
+\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode.
+.PP
+\fB-v, --version\fP print version and exit.
+.PP
+If no request to change policy is made,
+the default behavior is to read
+and display the current system state,
+including the default capabilities.
+.SH WARNING
+.PP
+This utility writes directly to Model Specific Registers.
+There is no locking or coordination should this utility
+be used to modify HWP limit fields at the same time that
+intel_pstate's sysfs attributes access the same MSRs.
+.PP
+Note that --hwp-desired and --hwp-window are considered experimental.
+Future versions of Linux reserve the right to access these
+fields internally -- potentially conflicting with user-space access.
+.SH EXAMPLE
+.nf
+# sudo x86_energy_perf_policy
+cpu0: EPB 6
+cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu1: EPB 6
+cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu2: EPB 6
+cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu3: EPB 6
+cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35
+.fi
.SH NOTES
-.B "x86_energy_perf_policy "
+.B "x86_energy_perf_policy"
runs only as root.
.SH FILES
.ta
.nf
/dev/cpu/*/msr
.fi
-
.SH "SEE ALSO"
+.nf
msr(4)
+Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+.fi
.PP
.SH AUTHORS
.nf
-Written by Len Brown <len.brown@intel.com>
+Len Brown
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 40b3e5482f8a..65bbe627a425 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -3,322 +3,1424 @@
* policy preference bias on recent X86 processors.
*/
/*
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2010 - 2017 Intel Corporation.
* Len Brown <len.brown@intel.com>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * This program is released under GPL v2
*/
+#define _GNU_SOURCE
+#include MSRHEADER
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
+#include <sched.h>
#include <sys/stat.h>
#include <sys/resource.h>
+#include <getopt.h>
+#include <err.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/time.h>
+#include <limits.h>
#include <stdlib.h>
#include <string.h>
+#include <cpuid.h>
+#include <errno.h>
+
+#define OPTARG_NORMAL (INT_MAX - 1)
+#define OPTARG_POWER (INT_MAX - 2)
+#define OPTARG_BALANCE_POWER (INT_MAX - 3)
+#define OPTARG_BALANCE_PERFORMANCE (INT_MAX - 4)
+#define OPTARG_PERFORMANCE (INT_MAX - 5)
+
+struct msr_hwp_cap {
+ unsigned char highest;
+ unsigned char guaranteed;
+ unsigned char efficient;
+ unsigned char lowest;
+};
-unsigned int verbose; /* set with -v */
-unsigned int read_only; /* set with -r */
+struct msr_hwp_request {
+ unsigned char hwp_min;
+ unsigned char hwp_max;
+ unsigned char hwp_desired;
+ unsigned char hwp_epp;
+ unsigned int hwp_window;
+ unsigned char hwp_use_pkg;
+} req_update;
+
+unsigned int debug;
+unsigned int verbose;
+unsigned int force;
char *progname;
-unsigned long long new_bias;
-int cpu = -1;
+int base_cpu;
+unsigned char update_epb;
+unsigned long long new_epb;
+unsigned char turbo_is_enabled;
+unsigned char update_turbo;
+unsigned char turbo_update_value;
+unsigned char update_hwp_epp;
+unsigned char update_hwp_min;
+unsigned char update_hwp_max;
+unsigned char update_hwp_desired;
+unsigned char update_hwp_window;
+unsigned char update_hwp_use_pkg;
+unsigned char update_hwp_enable;
+#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg)
+int max_cpu_num;
+int max_pkg_num;
+#define MAX_PACKAGES 64
+unsigned int first_cpu_in_pkg[MAX_PACKAGES];
+unsigned long long pkg_present_set;
+unsigned long long pkg_selected_set;
+cpu_set_t *cpu_present_set;
+cpu_set_t *cpu_selected_set;
+int genuine_intel;
+
+size_t cpu_setsize;
+
+char *proc_stat = "/proc/stat";
+
+unsigned int has_epb; /* MSR_IA32_ENERGY_PERF_BIAS */
+unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
+unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
+unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */
+
+unsigned int bdx_highest_ratio;
/*
- * Usage:
- *
- * -c cpu: limit action to a single CPU (default is all CPUs)
- * -v: verbose output (can invoke more than once)
- * -r: read-only, don't change any settings
- *
- * performance
- * Performance is paramount.
- * Unwilling to sacrifice any performance
- * for the sake of energy saving. (hardware default)
- *
- * normal
- * Can tolerate minor performance compromise
- * for potentially significant energy savings.
- * (reasonable default for most desktops and servers)
- *
- * powersave
- * Can tolerate significant performance hit
- * to maximize energy savings.
- *
- * n
- * a numerical value to write to the underlying MSR.
+ * maintain compatibility with original implementation, but don't document it:
*/
void usage(void)
{
- printf("%s: [-c cpu] [-v] "
- "(-r | 'performance' | 'normal' | 'powersave' | n)\n",
- progname);
+ fprintf(stderr, "%s [options] [scope][field value]\n", progname);
+ fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n");
+ fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n");
+ fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n");
+ fprintf(stderr,
+ "value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n");
+ fprintf(stderr, "--hwp-window usec\n");
+
+ fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n");
+ fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname);
+
exit(1);
}
-#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+/*
+ * If bdx_highest_ratio is set,
+ * then we must translate between MSR format and simple ratio
+ * used on the cmdline.
+ */
+int ratio_2_msr_perf(int ratio)
+{
+ int msr_perf;
+
+ if (!bdx_highest_ratio)
+ return ratio;
+
+ msr_perf = ratio * 255 / bdx_highest_ratio;
+
+ if (debug)
+ fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio);
+
+ return msr_perf;
+}
+int msr_perf_2_ratio(int msr_perf)
+{
+ int ratio;
+ double d;
+
+ if (!bdx_highest_ratio)
+ return msr_perf;
+
+ d = (double)msr_perf * (double) bdx_highest_ratio / 255.0;
+ d = d + 0.5; /* round */
+ ratio = (int)d;
+
+ if (debug)
+ fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d);
+
+ return ratio;
+}
+int parse_cmdline_epb(int i)
+{
+ if (!has_epb)
+ errx(1, "EPB not enabled on this platform");
+
+ update_epb = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return ENERGY_PERF_BIAS_POWERSAVE;
+ case OPTARG_BALANCE_POWER:
+ return ENERGY_PERF_BIAS_BALANCE_POWERSAVE;
+ case OPTARG_NORMAL:
+ return ENERGY_PERF_BIAS_NORMAL;
+ case OPTARG_BALANCE_PERFORMANCE:
+ return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE;
+ case OPTARG_PERFORMANCE:
+ return ENERGY_PERF_BIAS_PERFORMANCE;
+ }
+ if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE)
+ errx(1, "--epb must be from 0 to 15");
+ return i;
+}
+
+#define HWP_CAP_LOWEST 0
+#define HWP_CAP_HIGHEST 255
+
+/*
+ * "performance" changes hwp_min to cap.highest
+ * All others leave it at cap.lowest
+ */
+int parse_cmdline_hwp_min(int i)
+{
+ update_hwp_min = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ case OPTARG_BALANCE_POWER:
+ case OPTAR