summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/alternative.c9
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/bugs.c46
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/hygon.c405
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c24
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c17
-rw-r--r--arch/x86/kernel/dumpstack.c31
-rw-r--r--arch/x86/kernel/eisa.c10
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/topology.c4
-rw-r--r--arch/x86/kernel/tsc.c2
16 files changed, 531 insertions, 42 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 014f214da581..b9d5e7c9ef43 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -684,8 +684,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
* It means the size must be writable atomically and the address must be aligned
* in a way that permits an atomic write. It also makes sure we fit on a single
* page.
- *
- * Note: Must be called under text_mutex.
*/
void *text_poke(void *addr, const void *opcode, size_t len)
{
@@ -700,6 +698,8 @@ void *text_poke(void *addr, const void *opcode, size_t len)
*/
BUG_ON(!after_bootmem);
+ lockdep_assert_held(&text_mutex);
+
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -782,8 +782,6 @@ int poke_int3_handler(struct pt_regs *regs)
* - replace the first byte (int3) by the first byte of
* replacing opcode
* - sync cores
- *
- * Note: must be called under text_mutex.
*/
void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
@@ -792,6 +790,9 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
bp_int3_handler = handler;
bp_int3_addr = (u8 *)addr + sizeof(int3);
bp_patching_in_progress = true;
+
+ lockdep_assert_held(&text_mutex);
+
/*
* Corresponding read barrier in int3 notifier for making sure the
* in_progress and handler are correctly ordered wrt. patching.
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 9f148e3d45b4..7654febd5102 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -413,7 +413,7 @@ static int activate_managed(struct irq_data *irqd)
if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
/* Something in the core code broke! Survive gracefully */
pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
- return EINVAL;
+ return -EINVAL;
}
ret = assign_managed_vector(irqd, vector_searchmask);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index ec00d1ff5098..f7151cd03cb0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1640,6 +1640,7 @@ static int do_open(struct inode *inode, struct file *filp)
return 0;
}
+#ifdef CONFIG_PROC_FS
static int proc_apm_show(struct seq_file *m, void *v)
{
unsigned short bx;
@@ -1719,6 +1720,7 @@ static int proc_apm_show(struct seq_file *m, void *v)
units);
return 0;
}
+#endif
static int apm(void *unused)
{
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 347137e80bf5..1f5d2291c31e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
+obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c2313d0b9ca..40bdaea97fe7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -668,6 +668,45 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation);
enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
+/*
+ * These CPUs all support 44bits physical address space internally in the
+ * cache but CPUID can report a smaller number of physical address bits.
+ *
+ * The L1TF mitigation uses the top most address bit for the inversion of
+ * non present PTEs. When the installed memory reaches into the top most
+ * address bit due to memory holes, which has been observed on machines
+ * which report 36bits physical address bits and have 32G RAM installed,
+ * then the mitigation range check in l1tf_select_mitigation() triggers.
+ * This is a false positive because the mitigation is still possible due to
+ * the fact that the cache uses 44bit internally. Use the cache bits
+ * instead of the reported physical bits and adjust them on the affected
+ * machines to 44bit if the reported bits are less than 44.
+ */
+static void override_cache_bits(struct cpuinfo_x86 *c)
+{
+ if (c->x86 != 6)
+ return;
+
+ switch (c->x86_model) {
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ if (c->x86_cache_bits < 44)
+ c->x86_cache_bits = 44;
+ break;
+ }
+}
+
static void __init l1tf_select_mitigation(void)
{
u64 half_pa;
@@ -675,6 +714,8 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ override_cache_bits(&boot_cpu_data);
+
switch (l1tf_mitigation) {
case L1TF_MITIGATION_OFF:
case L1TF_MITIGATION_FLUSH_NOWARN:
@@ -694,11 +735,6 @@ static void __init l1tf_select_mitigation(void)
return;
#endif
- /*
- * This is extremely unlikely to happen because almost all
- * systems have far more MAX_PA/2 than RAM can be fit into
- * DIMM slots.
- */
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 84dee5ab745a..44c4ef3d989b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -919,6 +919,7 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
c->x86_phys_bits = 36;
#endif
+ c->x86_cache_bits = c->x86_phys_bits;
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
new file mode 100644
index 000000000000..a43d5f1f8b41
--- /dev/null
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Hygon Processor Support for Linux
+ *
+ * Copyright (C) 2018 Chengdu Haiguang IC Design Co., Ltd.
+ *
+ * Author: Pu Wen <puwen@hygon.cn>
+ */
+#include <linux/io.h>
+
+#include <asm/cpu.h>
+#include <asm/smp.h>
+#include <asm/cacheinfo.h>
+#include <asm/spec-ctrl.h>
+#include <asm/delay.h>
+#ifdef CONFIG_X86_64
+# include <asm/set_memory.h>
+#endif
+
+#include "cpu.h"
+
+/*
+ * nodes_per_socket: Stores the number of nodes per socket.
+ * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8]
+ */
+static u32 nodes_per_socket = 1;
+
+#ifdef CONFIG_NUMA
+/*
+ * To workaround broken NUMA config. Read the comment in
+ * srat_detect_node().
+ */
+static int nearby_node(int apicid)
+{
+ int i, node;
+
+ for (i = apicid - 1; i >= 0; i--) {
+ node = __apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+ node = __apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
+static void hygon_get_topology_early(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_TOPOEXT))
+ smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
+}
+
+/*
+ * Fixup core topology information for
+ * (1) Hygon multi-node processors
+ * Assumption: Number of cores in each internal node is the same.
+ * (2) Hygon processors supporting compute units
+ */
+static void hygon_get_topology(struct cpuinfo_x86 *c)
+{
+ u8 node_id;
+ int cpu = smp_processor_id();
+
+ /* get information required for multi-node processors */
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ int err;
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+
+ node_id = ecx & 0xff;
+
+ c->cpu_core_id = ebx & 0xff;
+
+ if (smp_num_siblings > 1)
+ c->x86_max_cores /= smp_num_siblings;
+
+ /*
+ * In case leaf B is available, use it to derive
+ * topology information.
+ */
+ err = detect_extended_topology(c);
+ if (!err)
+ c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+
+ } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
+ u64 value;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ node_id = value & 7;
+
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ } else
+ return;
+
+ if (nodes_per_socket > 1)
+ set_cpu_cap(c, X86_FEATURE_AMD_DCM);
+}
+
+/*
+ * On Hygon setup the lower bits of the APIC id distinguish the cores.
+ * Assumes number of cores is a power of two.
+ */
+static void hygon_detect_cmp(struct cpuinfo_x86 *c)
+{
+ unsigned int bits;
+ int cpu = smp_processor_id();
+
+ bits = c->x86_coreid_bits;
+ /* Low order bits define the core id (index of core in socket) */
+ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
+ /* Convert the initial APIC ID into the socket ID */
+ c->phys_proc_id = c->initial_apicid >> bits;
+ /* use socket ID also for last level cache */
+ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
+}
+
+static void srat_detect_node(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_NUMA
+ int cpu = smp_processor_id();
+ int node;
+ unsigned int apicid = c->apicid;
+
+ node = numa_cpu_node(cpu);
+ if (node == NUMA_NO_NODE)
+ node = per_cpu(cpu_llc_id, cpu);
+
+ /*
+ * On multi-fabric platform (e.g. Numascale NumaChip) a
+ * platform-specific handler needs to be called to fixup some
+ * IDs of the CPU.
+ */
+ if (x86_cpuinit.fixup_cpu_id)
+ x86_cpuinit.fixup_cpu_id(c, node);
+
+ if (!node_online(node)) {
+ /*
+ * Two possibilities here:
+ *
+ * - The CPU is missing memory and no node was created. In
+ * that case try picking one from a nearby CPU.
+ *
+ * - The APIC IDs differ from the HyperTransport node IDs.
+ * Assume they are all increased by a constant offset, but
+ * in the same order as the HT nodeids. If that doesn't
+ * result in a usable node fall back to the path for the
+ * previous case.
+ *
+ * This workaround operates directly on the mapping between
+ * APIC ID and NUMA node, assuming certain relationship
+ * between APIC ID, HT node ID and NUMA topology. As going
+ * through CPU mapping may alter the outcome, directly
+ * access __apicid_to_node[].
+ */
+ int ht_nodeid = c->initial_apicid;
+
+ if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = __apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
+ }
+ numa_set_node(cpu, node);
+#endif
+}
+
+static void early_init_hygon_mc(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ unsigned int bits, ecx;
+
+ /* Multi core CPU? */
+ if (c->extended_cpuid_level < 0x80000008)
+ return;
+
+ ecx = cpuid_ecx(0x80000008);
+
+ c->x86_max_cores = (ecx & 0xff) + 1;
+
+ /* CPU telling us the core id bits shift? */
+ bits = (ecx >> 12) & 0xF;
+
+ /* Otherwise recompute */
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
+
+ c->x86_coreid_bits = bits;
+#endif
+}
+
+static void bsp_init_hygon(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+ unsigned long long tseg;
+
+ /*
+ * Split up direct mapping around the TSEG SMM area.
+ * Don't do it for gbpages because there seems very little
+ * benefit in doing so.
+ */
+ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+ unsigned long pfn = tseg >> PAGE_SHIFT;
+
+ pr_debug("tseg: %010llx\n", tseg);
+ if (pfn_range_is_mapped(pfn, pfn + 1))
+ set_memory_4k((unsigned long)__va(tseg), 1);
+ }
+#endif
+
+ if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
+ u64 val;
+
+ rdmsrl(MSR_K7_HWCR, val);
+ if (!(val & BIT(24)))
+ pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
+ }
+
+ if (cpu_has(c, X86_FEATURE_MWAITX))
+ use_mwaitx_delay();
+
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ u32 ecx;
+
+ ecx = cpuid_ecx(0x8000001e);
+ nodes_per_socket = ((ecx >> 8) & 7) + 1;
+ } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ u64 value;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ nodes_per_socket = ((value >> 3) & 7) + 1;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
+ !boot_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ /*
+ * Try to cache the base value so further operations can
+ * avoid RMW. If that faults, do not enable SSBD.
+ */
+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
+ setup_force_cpu_cap(X86_FEATURE_SSBD);
+ x86_amd_ls_cfg_ssbd_mask = 1ULL << 10;
+ }
+ }
+}
+
+static void early_init_hygon(struct cpuinfo_x86 *c)
+{
+ u32 dummy;
+
+ early_init_hygon_mc(c);
+
+ set_cpu_cap(c, X86_FEATURE_K8);
+
+ rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
+ /*
+ * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+ * with P/T states and does not stop in deep C-states
+ */
+ if (c->x86_power & (1 << 8)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ }
+
+ /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
+ if (c->x86_power & BIT(12))
+ set_cpu_cap(c, X86_FEATURE_ACC_POWER);
+
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_SYSCALL32);
+#endif
+
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+ /*
+ * ApicID can always be treated as an 8-bit value for Hygon APIC So, we
+ * can safely set X86_FEATURE_EXTD_APICID unconditionally.
+ */
+ if (boot_cpu_has(X86_FEATURE_APIC))
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+#endif
+
+ /*
+ * This is only needed to tell the kernel whether to use VMCALL
+ * and VMMCALL. VMMCALL is never executed except under virt, so
+ * we can set it unconditionally.
+ */
+ set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
+ hygon_get_topology_early(c);
+}
+
+static void init_hygon(struct cpuinfo_x86 *c)
+{
+ early_init_hygon(c);
+
+ /*
+ * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+ */
+ clear_cpu_cap(c, 0*32+31);
+
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ /* get apicid instead of initial apic id from cpuid */
+ c->apicid = hard_smp_processor_id();
+
+ set_cpu_cap(c, X86_FEATURE_ZEN);
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
+ cpu_detect_cache_sizes(c);
+
+ hygon_detect_cmp(c);
+ hygon_get_topology(c);
+ srat_detect_node(c);
+
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
+ }
+
+ /*
+ * Hygon processors have APIC timer running in deep C states.
+ */
+ set_cpu_cap(c, X86_FEATURE_ARAT);
+
+ /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */
+ if (!cpu_has(c, X86_FEATURE_XENPV))
+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
+{
+ u32 ebx, eax, ecx, edx;
+ u16 mask = 0xfff;
+
+ if (c->extended_cpuid_level < 0x80000006)
+ return;
+
+ cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
+
+ tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
+ tlb_lli_4k[ENTRIES] = ebx & mask;
+
+ /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+ if (!((eax >> 16) & mask))
+ tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff;
+ else
+ tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
+
+ /* a 4M entry uses two 2M entries */
+ tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
+
+ /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+ if (!(eax & mask)) {
+ cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ tlb_lli_2m[ENTRIES] = eax & 0xff;
+ } else
+ tlb_lli_2m[ENTRIES] = eax & mask;
+
+ tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+}
+
+static const struct cpu_dev hygon_cpu_dev = {
+ .c_vendor = "Hygon",
+ .c_ident = { "HygonGenuine" },
+ .c_early_init = early_init_hygon,
+ .c_detect_tlb = cpu_detect_tlb_hygon,
+ .c_bsp_init = bsp_init_hygon,
+ .c_init = init_hygon,
+ .c_x86_vendor = X86_VENDOR_HYGON,
+};
+
+cpu_dev_register(hygon_cpu_dev);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 401e8c133108..fc3c07fe7df5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -150,6 +150,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
return false;
+ if (c->x86 != 6)
+ return false;
+
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
c->x86_stepping == spectre_bad_microcodes[i].stepping)
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 0624957aa068..07b5fc00b188 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -504,6 +504,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
struct microcode_amd *mc_amd;
struct ucode_cpu_info *uci;
struct ucode_patch *p;
+ enum ucode_state ret;
u32 rev, dummy;
BUG_ON(raw_smp_processor_id() != cpu);
@@ -521,9 +522,8 @@ static enum ucode_state apply_microcode_amd(int cpu)
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
- c->microcode = rev;
- uci->cpu_sig.rev = rev;
- return UCODE_OK;
+ ret = UCODE_OK;
+ goto out;
}
if (__apply_microcode_amd(mc_amd)) {
@@ -531,13 +531,21 @@ static enum ucode_state apply_microcode_amd(int cpu)
cpu, mc_amd->hdr.patch_id);
return UCODE_ERROR;
}
- pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
- mc_amd->hdr.patch_id);
- uci->cpu_sig.rev = mc_amd->hdr.patch_id;
- c->microcode = mc_amd->hdr.patch_id;
+ rev = mc_amd->hdr.patch_id;
+ ret = UCODE_UPDATED;
+
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
- return UCODE_UPDATED;
+out:
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+
+ /* Update boot_cpu_data's revision too, if we're on the BSP: */
+ if (c->cpu_index == boot_cpu_data.cpu_index)
+ boot_cpu_data.microcode = rev;
+
+ return ret;
}
static int install_equiv_cpu_table(const u8 *buf)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 97ccf4c3b45b..16936a24795c 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -795,6 +795,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_intel *mc;
+ enum ucode_state ret;
static int prev_rev;
u32 rev;
@@ -817,9 +818,8 @@ static enum ucode_state apply_microcode_intel(int cpu)
*/
rev = intel_get_microcode_revision();
if (rev >= mc->hdr.rev) {
- uci->cpu_sig.rev = rev;
- c->microcode = rev;
- return UCODE_OK;
+ ret = UCODE_OK;
+ goto out;
}
/*
@@ -848,10 +848,17 @@ static enum ucode_state apply_microcode_intel(int cpu)
prev_rev = rev;
}
+ ret = UCODE_UPDATED;
+
+out:
uci->cpu_sig.rev = rev;
- c->microcode = rev;
+ c->microcode = rev;
+
+ /* Update boot_cpu_data's revision too, if we're on the BSP: */
+ if (c->cpu_index == boot_cpu_data.cpu_index)
+ boot_cpu_data.microcode = rev;
- return UCODE_UPDATED;
+ return ret;
}
static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9c8652974f8e..2b5886401e5f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,6 +17,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/kasan.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
@@ -89,14 +90,24 @@ static void printk_stack_address(unsigned long address, int reliable,
* Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
* guesstimate in attempt to achieve all of the above.
*/
-void show_opcodes(u8 *rip, const char *loglvl)
+void show_opcodes(struct pt_regs *regs, const char *loglvl)
{
#define PROLOGUE_SIZE 42
#define EPILOGUE_SIZE 21
#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
u8 opcodes[OPCODE_BUFSIZE];
+ unsigned long prologue = regs->ip - PROLOGUE_SIZE;
+ bool bad_ip;
- if (probe_kernel_read(opcodes, rip - PROLOGUE_SIZE, OPCODE_BUFSIZE)) {
+ /*
+ * Make sure userspace isn't trying to trick us into dumping kernel
+ * memory by pointing the userspace instruction pointer at it.
+ */
+ bad_ip = user_mode(regs) &&
+ __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
+
+ if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue,
+ OPCODE_BUFSIZE)) {
printk("%sCode: Bad RIP value.\n", loglvl);
} else {
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
@@ -112,7 +123,7 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
#else
printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
#endif
- show_opcodes((u8 *)regs->ip, loglvl);
+ show_opcodes(regs, loglvl);
}
void show_iret_regs(struct pt_regs *regs)
@@ -135,7 +146,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
* they can be printed in the right context.
*/
if (!partial && on_stack(info, regs, sizeof(*regs))) {
- __show_regs(regs, 0);
+ __show_regs(regs, SHOW_REGS_SHORT);
} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
IRET_FRAME_SIZE)) {
@@ -333,7 +344,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
oops_exit();
/* Executive summary in case the oops scrolled away */
- __show_regs(&exec_summary_regs, true);
+ __show_regs(&exec_summary_regs, SHOW_REGS_ALL);
if (!signr)
return;
@@ -346,7 +357,10 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
* We're not going to return, but we might be on an IST stack or
* have very little stack space left. Rewind the stack and kill
* the task.
+ * Before we rewind the stack, we have to tell KASAN that we're going to
+ * reuse the task stack and that existing poisons are invalid.
*/
+ kasan_unpoison_task_stack(current);
rewind_stack_do_exit(signr);
}
NOKPROBE_SYMBOL(oops_end);
@@ -393,14 +407,9 @@ void die(const char *str, struct pt_regs *regs, long err)
void show_regs(struct pt_regs *regs)
{
- bool all = true;
-
show_regs_print_info(KERN_DEFAULT);
- if (IS_ENABLED(CONFIG_X86_32))
- all = !user_mode(regs);
-
- __show_regs(regs, all);
+ __show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL);
/*
* When in-kernel, we also print out the stack at the time of the fault..
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index f260e452e4f8..e8c8c5d78dbd 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -7,11 +7,17 @@
#include <linux/eisa.h>
#include <linux/io.h>
+#include <xen/xen.h>
+
static __init int eisa_bus_probe(void)
{
- void __iomem *p = ioremap(0x0FFFD9, 4);
+ void __iomem *p;
+
+ if (xen_pv_domain() && !xen_initial_domain())
+ return 0;
- if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+ p = ioremap(0x0FFFD9, 4);
+ if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24))
EISA_bus = 1;
iounmap(p);
return 0;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2924fd447e61..5046a3c9dec2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,7 @@
#include <asm/intel_rdt_sched.h>
#include <asm/proto.h>
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -85,7 +85,7 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
- if (!all)
+ if (mode != SHOW_REGS_ALL)
return;
cr0 = read_cr0();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a451bc374b9b..ea5ea850348d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -62,7 +62,7 @@
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -87,9 +87,17 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15);
- if (!all)
+ if (mode == SHOW_REGS_SHORT)
return;
+ if (mode == SHOW_REGS_USER) {
+ rdmsrl(MSR_FS_BASE, fs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
+ fs, shadowgs);
+ return;
+ }
+
asm("movl %%ds,%0" : "=r" (ds));
asm("movl %%cs,%0" : "=r" (cs));
asm("movl %%es,%0" : "=r" (es));
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 12cbe2b88c0f..738bf42b0218 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -111,8 +111,10 @@ int arch_register_cpu(int num)
/*
* Currently CPU0 is only hotpluggable on Intel platforms. Other
* vendors can add hotplug support later.
+ * Xen PV guests don't support CPU0 hotplug at all.
*/
- if (c->x86_vendor != X86_VENDOR_INTEL)
+ if (c->x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_has(X86_FEATURE_XENPV))
cpu0_hotpluggable = 0;
/*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1463468ba9a0..6490f618e096 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1415,7 +1415,7 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
static unsigned long __init get_loops_per_jiffy(void)
{
- unsigned long lpj = tsc_khz * KHZ;
+ u64 lpj = (u64)tsc_khz * KHZ;
do_div(lpj, HZ);
return lpj;