summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-14 13:34:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-14 13:34:37 -0700
commit0520058d0578c2924b1571c16281f873cb4a3d2b (patch)
treed59fa88120a43dc90ec631508a0cbe13dc80e3ef /arch
parentcd94257d7a8103acf136e4bd46e3d0ad698a6f3d (diff)
parent585c6ed738a5ed2a6fd7662fa1d82f25acfa85de (diff)
Merge tag 'for-linus-5.9-rc1b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull more xen updates from Juergen Gross: - Remove support for running as 32-bit Xen PV-guest. 32-bit PV guests are rarely used, are lacking security fixes for Meltdown, and can be easily replaced by PVH mode. Another series for doing more cleanup will follow soon (removal of 32-bit-only pvops functionality). - Fixes and additional features for the Xen display frontend driver. * tag 'for-linus-5.9-rc1b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: drm/xen-front: Pass dumb buffer data offset to the backend xen: Sync up with the canonical protocol definition in Xen drm/xen-front: Add YUYV to supported formats drm/xen-front: Fix misused IS_ERR_OR_NULL checks xen/gntdev: Fix dmabuf import with non-zero sgt offset x86/xen: drop tests for highmem in pv code x86/xen: eliminate xen-asm_64.S x86/xen: remove 32-bit Xen PV guest support
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/entry/entry_32.S109
-rw-r--r--arch/x86/entry/vdso/vdso32/note.S30
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/segment.h2
-rw-r--r--arch/x86/kernel/head_32.S31
-rw-r--r--arch/x86/xen/Kconfig3
-rw-r--r--arch/x86/xen/Makefile3
-rw-r--r--arch/x86/xen/apic.c17
-rw-r--r--arch/x86/xen/enlighten_pv.c78
-rw-r--r--arch/x86/xen/mmu_pv.c492
-rw-r--r--arch/x86/xen/p2m.c6
-rw-r--r--arch/x86/xen/setup.c36
-rw-r--r--arch/x86/xen/smp_pv.c18
-rw-r--r--arch/x86/xen/vdso.h6
-rw-r--r--arch/x86/xen/xen-asm.S194
-rw-r--r--arch/x86/xen/xen-asm_32.S185
-rw-r--r--arch/x86/xen/xen-asm_64.S192
-rw-r--r--arch/x86/xen/xen-head.S6
-rw-r--r--arch/x86/xen/xen-ops.h1
19 files changed, 285 insertions, 1126 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 29b7d52143e9..df8c017e6161 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -449,8 +449,6 @@
.macro SWITCH_TO_KERNEL_STACK
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
BUG_IF_WRONG_CR3
SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
@@ -599,8 +597,6 @@
*/
.macro SWITCH_TO_ENTRY_STACK
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
/* Bytes to copy */
movl $PTREGS_SIZE, %ecx
@@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
* will ignore all of the single-step traps generated in this range.
*/
-#ifdef CONFIG_XEN_PV
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-SYM_CODE_START(xen_sysenter_target)
- addl $5*4, %esp /* remove xen-provided frame */
- jmp .Lsysenter_past_esp
-SYM_CODE_END(xen_sysenter_target)
-#endif
-
/*
* 32-bit SYSENTER entry.
*
@@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
movl %esp, %eax
call do_SYSENTER_32
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ testl %eax, %eax
+ jz .Lsyscall_32_done
STACKLEAK_ERASE
@@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32)
#endif
.endm
-#ifdef CONFIG_PARAVIRT
-SYM_CODE_START(native_iret)
- iret
- _ASM_EXTABLE(native_iret, asm_iret_error)
-SYM_CODE_END(native_iret)
-#endif
-
-#ifdef CONFIG_XEN_PV
-/*
- * See comment in entry_64.S for further explanation
- *
- * Note: This is not an actual IDT entry point. It's a XEN specific entry
- * point and therefore named to match the 64-bit trampoline counterpart.
- */
-SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback)
- /*
- * Check to see if we got the event in the critical
- * region in xen_iret_direct, after we've reenabled
- * events and checked for pending events. This simulates
- * iret instruction's behaviour where it delivers a
- * pending interrupt when enabling interrupts:
- */
- cmpl $xen_iret_start_crit, (%esp)
- jb 1f
- cmpl $xen_iret_end_crit, (%esp)
- jae 1f
- call xen_iret_crit_fixup
-1:
- pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
-
- mov %esp, %eax
- call xen_pv_evtchn_do_upcall
- jmp handle_exception_return
-SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback)
-
-/*
- * Hypervisor uses this for application faults while it executes.
- * We get here for two reasons:
- * 1. Fault while reloading DS, ES, FS or GS
- * 2. Fault while executing IRET
- * Category 1 we fix up by reattempting the load, and zeroing the segment
- * register if the load fails.
- * Category 2 we fix up by jumping to do_iret_error. We cannot use the
- * normal Linux return path in this case because if we use the IRET hypercall
- * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
- * We distinguish between categories by maintaining a status value in EAX.
- */
-SYM_FUNC_START(xen_failsafe_callback)
- pushl %eax
- movl $1, %eax
-1: mov 4(%esp), %ds
-2: mov 8(%esp), %es
-3: mov 12(%esp), %fs
-4: mov 16(%esp), %gs
- /* EAX == 0 => Category 1 (Bad segment)
- EAX != 0 => Category 2 (Bad IRET) */
- testl %eax, %eax
- popl %eax
- lea 16(%esp), %esp
- jz 5f
- jmp asm_iret_error
-5: pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
- jmp handle_exception_return
-
-.section .fixup, "ax"
-6: xorl %eax, %eax
- movl %eax, 4(%esp)
- jmp 1b
-7: xorl %eax, %eax
- movl %eax, 8(%esp)
- jmp 2b
-8: xorl %eax, %eax
- movl %eax, 12(%esp)
- jmp 3b
-9: xorl %eax, %eax
- movl %eax, 16(%esp)
- jmp 4b
-.previous
- _ASM_EXTABLE(1b, 6b)
- _ASM_EXTABLE(2b, 7b)
- _ASM_EXTABLE(3b, 8b)
- _ASM_EXTABLE(4b, 9b)
-SYM_FUNC_END(xen_failsafe_callback)
-#endif /* CONFIG_XEN_PV */
-
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
index e78047d119f6..2cbd39939dc6 100644
--- a/arch/x86/entry/vdso/vdso32/note.S
+++ b/arch/x86/entry/vdso/vdso32/note.S
@@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a")
ELFNOTE_END
BUILD_SALT
-
-#ifdef CONFIG_XEN
-/*
- * Add a special note telling glibc's dynamic linker a fake hardware
- * flavor that it will use to choose the search path for libraries in the
- * same way it uses real hardware capabilities like "mmx".
- * We supply "nosegneg" as the fake capability, to indicate that we
- * do not like negative offsets in instructions using segment overrides,
- * since we implement those inefficiently. This makes it possible to
- * install libraries optimized to avoid those access patterns in someplace
- * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
- * corresponding to the bits here is needed to make ldconfig work right.
- * It should contain:
- * hwcap 1 nosegneg
- * to match the mapping of bit to name that we give here.
- *
- * At runtime, the fake hardware feature will be considered to be present
- * if its bit is set in the mask word. So, we start with the mask 0, and
- * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
- */
-
-#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
-
-ELFNOTE_START(GNU, 2, "a")
- .long 1 /* ncaps */
-VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
- .long 0 /* mask */
- .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
-ELFNOTE_END
-#endif
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6e81788a30c1..28996fe19301 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+#ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void);
#endif
#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 6669164abadc..9646c300f128 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+#ifdef CONFIG_XEN_PV
extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f66a6b90f954..7ed84c282233 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -134,38 +134,7 @@ SYM_CODE_START(startup_32)
movl %eax,pa(initial_page_table+0xffc)
#endif
-#ifdef CONFIG_PARAVIRT
- /* This is can only trip for a broken bootloader... */
- cmpw $0x207, pa(boot_params + BP_version)
- jb .Ldefault_entry
-
- /* Paravirt-compatible boot parameters. Look to see what architecture
- we're booting under. */
- movl pa(boot_params + BP_hardware_subarch), %eax
- cmpl $num_subarch_entries, %eax
- jae .Lbad_subarch
-
- movl pa(subarch_entries)(,%eax,4), %eax
- subl $__PAGE_OFFSET, %eax
- jmp *%eax
-
-.Lbad_subarch:
-SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK)
- /* Unknown implementation; there's really
- nothing we can do at this point. */
- ud2a
-
- __INITDATA
-
-subarch_entries:
- .long .Ldefault_entry /* normal x86/PC */
- .long xen_entry /* Xen hypervisor */
- .long .Ldefault_entry /* Moorestown MID */
-num_subarch_entries = (. - subarch_entries) / 4
-.previous
-#else
jmp .Ldefault_entry
-#endif /* CONFIG_PARAVIRT */
SYM_CODE_END(startup_32)
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1aded63a95cb..218acbd5c7a0 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -19,6 +19,7 @@ config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
+ depends on X86_64
select PARAVIRT_XXL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
@@ -50,7 +51,7 @@ config XEN_PVHVM_SMP
config XEN_512GB
bool "Limit Xen pv-domain memory to 512GB"
- depends on XEN_PV && X86_64
+ depends on XEN_PV
default y
help
Limit paravirtualized user domains to 512GB of RAM.
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 5f1db522d06b..fc5c5ba4aacb 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-asm.o := y
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
@@ -33,7 +33,6 @@ obj-$(CONFIG_XEN_PV) += mmu_pv.o
obj-$(CONFIG_XEN_PV) += irq.o
obj-$(CONFIG_XEN_PV) += multicalls.o
obj-$(CONFIG_XEN_PV) += xen-asm.o
-obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o
obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 1aff4ae65655..e82fd1910dae 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -60,10 +60,6 @@ static u32 xen_apic_read(u32 reg)
if (reg == APIC_LVR)
return 0x14;
-#ifdef CONFIG_X86_32
- if (reg == APIC_LDR)
- return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
-#endif
if (reg != APIC_ID)
return 0;
@@ -129,14 +125,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb)
return initial_apic_id >> index_msb;
}
-#ifdef CONFIG_X86_32
-static int xen_x86_32_early_logical_apicid(int cpu)
-{
- /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */
- return 1 << cpu;
-}
-#endif
-
static void xen_noop(void)
{
}
@@ -199,11 +187,6 @@ static struct apic xen_pv_apic = {
.icr_write = xen_apic_icr_write,
.wait_icr_idle = xen_noop,
.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
-
-#ifdef CONFIG_X86_32
- /* generic_processor_info and setup_local_APIC. */
- .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid,
-#endif
};
static void __init xen_apic_check(void)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 2aab43a13a8c..22e741e0b10c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -119,14 +119,6 @@ static void __init xen_banner(void)
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
-
-#ifdef CONFIG_X86_32
- pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
- "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
- "from the Linux kernel!\n"
- "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
- "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
-#endif
}
static void __init xen_pv_init_platform(void)
@@ -353,15 +345,13 @@ static void set_aliased_prot(void *v, pgprot_t prot)
pte_t *ptep;
pte_t pte;
unsigned long pfn;
- struct page *page;
unsigned char dummy;
+ void *va;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
- page = pfn_to_page(pfn);
-
pte = pfn_pte(pfn, prot);
/*
@@ -391,14 +381,10 @@ static void set_aliased_prot(void *v, pgprot_t prot)
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
- if (!PageHighMem(page)) {
- void *av = __va(PFN_PHYS(pfn));
+ va = __va(PFN_PHYS(pfn));
- if (av != v)
- if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
- BUG();
- } else
- kmap_flush_unused();
+ if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+ BUG();
preempt_enable();
}
@@ -538,30 +524,12 @@ static void load_TLS_descriptor(struct thread_struct *t,
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
/*
- * XXX sleazy hack: If we're being called in a lazy-cpu zone
- * and lazy gs handling is enabled, it means we're in a
- * context switch, and %gs has just been saved. This means we
- * can zero it out to prevent faults on exit from the
- * hypervisor if the next process has no %gs. Either way, it
- * has been saved, and the new value will get loaded properly.
- * This will go away as soon as Xen has been modified to not
- * save/restore %gs for normal hypercalls.
- *
- * On x86_64, this hack is not used for %gs, because gs points
- * to KERNEL_GS_BASE (and uses it for PDA references), so we
- * must not zero %gs on x86_64
- *
- * For x86_64, we need to zero %fs, otherwise we may get an
+ * In lazy mode we need to zero %fs, otherwise we may get an
* exception between the new %fs descriptor being loaded and
* %fs being effectively cleared at __switch_to().
*/
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-#ifdef CONFIG_X86_32
- lazy_load_gs(0);
-#else
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
loadsegment(fs, 0);
-#endif
- }
xen_mc_batch();
@@ -572,13 +540,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
-#ifdef CONFIG_X86_64
static void xen_load_gs_index(unsigned int idx)
{
if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
BUG();
}
-#endif
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr)
@@ -597,7 +563,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
preempt_enable();
}
-#ifdef CONFIG_X86_64
void noist_exc_debug(struct pt_regs *regs);
DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
@@ -697,7 +662,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
return true;
}
-#endif
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
@@ -710,10 +674,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
info->vector = vector;
addr = gate_offset(val);
-#ifdef CONFIG_X86_64
if (!get_trap_addr((void **)&addr, val->bits.ist))
return 0;
-#endif /* CONFIG_X86_64 */
info->address = addr;
info->cs = gate_segment(val);
@@ -958,15 +920,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
-#ifdef CONFIG_X86_64
unsigned int which;
u64 base;
-#endif
ret = 0;
switch (msr) {
-#ifdef CONFIG_X86_64
case MSR_FS_BASE: which = SEGBASE_FS; goto set;
case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
@@ -976,7 +935,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
if (HYPERVISOR_set_segment_base(which, base) != 0)
ret = -EIO;
break;
-#endif
case MSR_STAR:
case MSR_CSTAR:
@@ -1058,9 +1016,7 @@ void __init xen_setup_vcpu_info_placement(void)
static const struct pv_info xen_info __initconst = {
.shared_kernel_pmd = 0,
-#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
-#endif
.name = "Xen",
};
@@ -1086,18 +1042,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_pmc = xen_read_pmc,
.iret = xen_iret,
-#ifdef CONFIG_X86_64
.usergs_sysret64 = xen_sysret64,
-#endif
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
.load_gdt = xen_load_gdt,
.load_idt = xen_load_idt,
.load_tls = xen_load_tls,
-#ifdef CONFIG_X86_64
.load_gs_index = xen_load_gs_index,
-#endif
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
@@ -1364,15 +1316,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* keep using Xen gdt for now; no urgent need to change it */
-#ifdef CONFIG_X86_32
- pv_info.kernel_rpl = 1;
- if (xen_feature(XENFEAT_supervisor_mode_kernel))
- pv_info.kernel_rpl = 0;
-#else
pv_info.kernel_rpl = 0;
-#endif
- /* set the limit of our address space */
- xen_reserve_top();
/*
* We used to do this in xen_arch_setup, but that is too late
@@ -1384,12 +1328,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
if (rc != 0)
xen_raw_printk("physdev_op failed %d\n", rc);
-#ifdef CONFIG_X86_32
- /* set up basic CPUID stuff */
- cpu_detect(&new_cpu_data);
- set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
- new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
-#endif
if (xen_start_info->mod_start) {
if (xen_start_info->flags & SIF_MOD_START_PFN)
@@ -1458,12 +1396,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_efi_init(&boot_params);
/* Start the world */
-#ifdef CONFIG_X86_32
- i386_start_kernel();
-#else
cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
-#endif
}
static int xen_cpu_up_prepare_pv(unsigned int cpu)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a58d9c69807a..3273c985d3dd 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -86,19 +86,8 @@
#include "mmu.h"
#include "debugfs.h"
-#ifdef CONFIG_X86_32
-/*
- * Identity map, in addition to plain kernel map. This needs to be
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
-#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
-static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
-#endif
-#ifdef CONFIG_X86_64
/* l3 pud for userspace vsyscall mapping */
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
-#endif /* CONFIG_X86_64 */
/*
* Protects atomic reservation decrease/increase against concurrent increases.
@@ -280,10 +269,7 @@ static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
if (!xen_batched_set_pte(ptep, pteval)) {
/*
* Could call native_set_pte() here and trap and
- * emulate the PTE write but with 32-bit guests this
- * needs two traps (one for each of the two 32-bit
- * words in the PTE) so do one hypercall directly
- * instead.
+ * emulate the PTE write, but a hypercall is much cheaper.
*/
struct mmu_update u;
@@ -439,26 +425,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
xen_set_pud_hyper(ptr, val);
}
-#ifdef CONFIG_X86_PAE
-static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
- trace_xen_mmu_set_pte_atomic(ptep, pte);
- __xen_set_pte(ptep, pte);
-}
-
-static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- trace_xen_mmu_pte_clear(mm, addr, ptep);
- __xen_set_pte(ptep, native_make_pte(0));
-}
-
-static void xen_pmd_clear(pmd_t *pmdp)
-{
- trace_xen_mmu_pmd_clear(pmdp);
- set_pmd(pmdp, __pmd(0));
-}
-#endif /* CONFIG_X86_PAE */
-
__visible pmd_t xen_make_pmd(pmdval_t pmd)
{
pmd = pte_pfn_to_mfn(pmd);
@@ -466,7 +432,6 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
-#ifdef CONFIG_X86_64
__visible pudval_t xen_pud_val(pud_t pud)
{
return pte_mfn_to_pfn(pud.pud);
@@ -571,27 +536,27 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
-#endif /* CONFIG_X86_64 */
-static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
- int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
- bool last, unsigned long limit)
+static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ bool last, unsigned long limit)
{
- int i, nr, flush = 0;
+ int i, nr;
nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
for (i = 0; i < nr; i++) {
if (!pmd_none(pmd[i]))
- flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
+ (*func)(mm, pmd_page(pmd[i]), PT_PTE);
}
- return flush;
}
-static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
- int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
- bool last, unsigned long limit)
+static void xen_pud_walk(struct mm_struct *mm, pud_t *pud,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ bool last, unsigned long limit)
{
- int i, nr, flush = 0;
+ int i, nr;
nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
for (i = 0; i < nr; i++) {
@@ -602,29 +567,26 @@ static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
pmd = pmd_offset(&pud[i], 0);
if (PTRS_PER_PMD > 1)
- flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
- flush |= xen_pmd_walk(mm, pmd, func,
- last && i == nr - 1, limit);
+ (*func)(mm, virt_to_page(pmd), PT_PMD);
+ xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit);
}
- return flush;
}
-static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
- int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
- bool last, unsigned long limit)
+static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ bool last, unsigned long limit)
{
- int flush = 0;
pud_t *pud;
if (p4d_none(*p4d))
- return flush;
+ return;
pud = pud_offset(p4d, 0);
if (PTRS_PER_PUD > 1)
- flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
- flush |= xen_pud_walk(mm, pud, func, last, limit);
- return flush;
+ (*func)(mm, virt_to_page(pud), PT_PUD);
+ xen_pud_walk(mm, pud, func, last, limit);
}
/*
@@ -636,32 +598,27 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
* will be STACK_TOP_MAX, but at boot we need to pin up to
* FIXADDR_TOP.
*
- * For 32-bit the important bit is that we don't pin beyond there,
- * because then we start getting into Xen's ptes.
- *
- * For 64-bit, we must skip the Xen hole in the middle of the address
- * space, just after the big x86-64 virtual hole.
+ * We must skip the Xen hole in the middle of the address space, just after
+ * the big x86-64 virtual hole.
*/
-static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- int i, nr, flush = 0;
+ int i, nr;
unsigned hole_low = 0, hole_high = 0;
/* The limit is the last byte to be touched */
limit--;
BUG_ON(limit >= FIXADDR_TOP);
-#ifdef CONFIG_X86_64
/*
* 64-bit has a great big hole in the middle of the address
* space, which contains the Xen mappings.
*/
hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
hole_high = pgd_index(GUARD_HOLE_END_ADDR);
-#endif
nr = pgd_index(limit) + 1;
for (i = 0; i < nr; i++) {
@@ -674,22 +631,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
continue;
p4d = p4d_offset(&pgd[i], 0);
- flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
+ xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
}
/* Do the top level last, so that the callbacks can use it as
a cue to do final things like tlb flushes. */
- flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
-
- return flush;
+ (*func)(mm, virt_to_page(pgd), PT_PGD);
}
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static void xen_pgd_walk(struct mm_struct *mm,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- return __xen_pgd_walk(mm, mm->pgd, func, limit);
+ __xen_pgd_walk(mm, mm->pgd, func, limit);
}
/* If we're using split pte locks, then take the page's lock and
@@ -722,26 +677,17 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
xen_extend_mmuext_op(&op);
}
-static int xen_pin_page(struct mm_struct *mm, struct page *page,
- enum pt_level level)
+static void xen_pin_page(struct mm_struct *mm, struct page *page,
+ enum pt_level level)
{
unsigned pgfl = TestSetPagePinned(page);
- int flush;
-
- if (pgfl)
- flush = 0; /* already pinned */
- else if (PageHighMem(page))
- /* kmaps need flushing if we found an unpinned
- highpage */
- flush = 1;
- else {
+
+ if (!pgfl) {
void *pt = lowmem_page_address(page);
unsigned long pfn = page_to_pfn(page);
struct multicall_space mcs = __xen_mc_entry(0);
spinlock_t *ptl;
- flush = 0;
-
/*
* We need to hold the pagetable lock between the time
* we make the pagetable RO and when we actually pin
@@ -778,8 +724,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
xen_mc_callback(xen_pte_unlock, ptl);
}
}
-
- return flush;
}
/* This is called just after a mm has been created, but it has not
@@ -787,39 +731,22 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
+ pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
trace_xen_mmu_pgd_pin(mm, pgd);
xen_mc_batch();
- if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
- /* re-enable interrupts for flushing */
- xen_mc_issue(0);
+ __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT);
- kmap_flush_unused();
+ xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
- xen_mc_batch();
+ if (user_pgd) {
+ xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
+ xen_do_pin(MMUEXT_PIN_L4_TABLE,
+ PFN_DOWN(__pa(user_pgd)));
}
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(pgd);
-
- xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
-
- if (user_pgd) {
- xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
- xen_do_pin(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa(user_pgd)));
- }
- }
-#else /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_PAE
- /* Need to make sure unshared kernel PMD is pinnable */
- xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
- PT_PMD);
-#endif
- xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
-#endif /* CONFIG_X86_64 */
xen_mc_issue(0);
}
@@ -854,11 +781,10 @@ void xen_mm_pin_all(void)
spin_unlock(&pgd_lock);
}
-static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
- enum pt_level level)
+static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
+ enum pt_level level)
{
SetPagePinned(page);
- return 0;
}
/*
@@ -870,18 +796,16 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
static void __init xen_after_bootmem(void)
{
static_branch_enable(&xen_struct_pages_ready);
-#ifdef CONFIG_X86_64
SetPagePinned(virt_to_page(level3_user_vsyscall));
-#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
-static int xen_unpin_page(struct mm_struct *mm, struct page *page,
- enum pt_level level)
+static void xen_unpin_page(struct mm_struct *mm, struct page *page,
+ enum pt_level level)
{
unsigned pgfl = TestClearPagePinned(page);
- if (pgfl && !PageHighMem(page)) {
+ if (pgfl) {
void *pt = lowmem_page_address(page);
unsigned long pfn = page_to_pfn(page);
spinlock_t *ptl = NULL;
@@ -912,36 +836,24 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page,
xen_mc_callback(xen_pte_unlock, ptl);
}
}
-
- return 0; /* never need to flush on unpin */
}
/* Release a pagetables pages back as normal RW */
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
{
+ pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
trace_xen_mmu_pgd_unpin(mm, pgd);
xen_mc_batch();
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-#ifdef CONFIG_X86_64
- {
- pgd_t