summaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-11-28 09:46:22 -0800
committerTejun Heo <tj@kernel.org>2011-11-28 09:46:22 -0800
commitd4bbf7e7759afc172e2bfbc5c416324590049cdd (patch)
tree7eab5ee5481cd3dcf1162329fec827177640018a /arch/x86/xen
parenta150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff)
parent401d0069cb344f401bc9d264c31db55876ff78c0 (diff)
Merge branch 'master' into x86/memblock
Conflicts & resolutions: * arch/x86/xen/setup.c dc91c728fd "xen: allow extra memory to be in multiple regions" 24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..." conflicted on xen_add_extra_mem() updates. The resolution is trivial as the latter just want to replace memblock_x86_reserve_range() with memblock_reserve(). * drivers/pci/intel-iommu.c 166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/" 5dfe8660a3d "bootmem: Replace work_with_active_regions() with..." conflicted as the former moved the file under drivers/iommu/. Resolved by applying the chnages from the latter on the moved file. * mm/Kconfig 6661672053a "memblock: add NO_BOOTMEM config symbol" c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option" conflicted trivially. Both added config options. Just letting both add their own options resolves the conflict. * mm/memblock.c d1f0ece6cdc "mm/memblock.c: small function definition fixes" ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()" confliected. The former updates function removed by the latter. Resolution is trivial. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig11
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c36
-rw-r--r--arch/x86/xen/grant-table.c2
-rw-r--r--arch/x86/xen/mmu.c205
-rw-r--r--arch/x86/xen/multicalls.c169
-rw-r--r--arch/x86/xen/multicalls.h6
-rw-r--r--arch/x86/xen/p2m.c128
-rw-r--r--arch/x86/xen/platform-pci-unplug.c2
-rw-r--r--arch/x86/xen/setup.c294
-rw-r--r--arch/x86/xen/smp.c15
-rw-r--r--arch/x86/xen/time.c21
-rw-r--r--arch/x86/xen/trace.c62
-rw-r--r--arch/x86/xen/vga.c67
-rw-r--r--arch/x86/xen/xen-asm_32.S8
-rw-r--r--arch/x86/xen/xen-ops.h11
16 files changed, 634 insertions, 407 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5cc821cb2e09..26c731a106af 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST
config XEN_PVHVM
def_bool y
- depends on XEN
- depends on X86_LOCAL_APIC
+ depends on XEN && PCI && X86_LOCAL_APIC
config XEN_MAX_DOMAIN_MEMORY
int
@@ -49,11 +48,3 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
-
-config XEN_DEBUG
- bool "Enable Xen debug checks"
- depends on XEN
- default n
- help
- Enable various WARN_ON checks in the Xen MMU code.
- Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 17c565de3d64..add2c2d729ce 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,8 +15,10 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
grant-table.o suspend.o platform-pci-unplug.o \
p2m.o
+obj-$(CONFIG_EVENT_TRACING) += trace.o
+
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
-
+obj-$(CONFIG_XEN_DOM0) += vga.o
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5525163a0398..1f928659c338 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned int machine_to_phys_order;
-EXPORT_SYMBOL(machine_to_phys_order);
+unsigned long machine_to_phys_nr;
+EXPORT_SYMBOL(machine_to_phys_nr);
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
~((1 << X86_FEATURE_APIC) | /* disable local APIC */
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1;
+ cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx);
xsave_mask =
@@ -341,6 +342,8 @@ static void xen_set_ldt(const void *addr, unsigned entries)
struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
+ trace_xen_cpu_set_ldt(addr, entries);
+
op = mcs.args;
op->cmd = MMUEXT_SET_LDT;
op->arg1.linear_addr = (unsigned long)addr;
@@ -496,6 +499,8 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
u64 entry = *(u64 *)ptr;
+ trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
+
preempt_disable();
xen_mc_flush();
@@ -565,6 +570,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
unsigned long p = (unsigned long)&dt[entrynum];
unsigned long start, end;
+ trace_xen_cpu_write_idt_entry(dt, entrynum, g);
+
preempt_disable();
start = __this_cpu_read(idt_desc.address);
@@ -619,6 +626,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
+ trace_xen_cpu_load_idt(desc);
+
spin_lock(&lock);
__get_cpu_var(idt_desc) = *desc;
@@ -637,6 +646,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
const void *desc, int type)
{
+ trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
+
preempt_disable();
switch (type) {
@@ -665,6 +676,8 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
const void *desc, int type)
{
+ trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
+
switch (type) {
case DESC_LDT:
case DESC_TSS:
@@ -684,7 +697,9 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
static void xen_load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
- struct multicall_space mcs = xen_mc_entry(0);
+ struct multicall_space mcs;
+
+ mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
@@ -937,6 +952,10 @@ static const struct pv_info xen_info __initconst = {
.paravirt_enabled = 1,
.shared_kernel_pmd = 0,
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = FLAT_USER_CS64,
+#endif
+
.name = "Xen",
};
@@ -1248,6 +1267,14 @@ asmlinkage void __init xen_start_kernel(void)
if (pci_xen)
x86_init.pci.arch_init = pci_xen_init;
} else {
+ const struct dom0_vga_console_info *info =
+ (void *)((char *)xen_start_info +
+ xen_start_info->console.dom0.info_off);
+
+ xen_init_vga(info, xen_start_info->console.dom0.info_size);
+ xen_start_info->console.domU.mfn = 0;
+ xen_start_info->console.domU.evtchn = 0;
+
/* Make sure ACS will be enabled */
pci_request_acs();
}
@@ -1329,7 +1356,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
int cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ xen_vcpu_setup(cpu);
if (xen_have_vector_callback)
xen_init_lock_cpu(cpu);
break;
@@ -1359,7 +1386,6 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_smp_init();
register_cpu_notifier(&xen_hvm_cpu_notifier);
xen_unplug_emulated_devices();
- have_vcpu_info_placement = 0;
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 49ba9b5224d1..5a40d24ba331 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -71,7 +71,7 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
if (shared == NULL) {
struct vm_struct *area =
- xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+ alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
BUG_ON(area == NULL);
shared = area->addr;
*__shared = shared;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ad54fa10f8a2..f4bf8aa574f4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -48,6 +48,8 @@
#include <linux/memblock.h>
#include <linux/seq_file.h>
+#include <trace/events/xen.h>
+
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
@@ -194,6 +196,8 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
struct multicall_space mcs;
struct mmu_update *u;
+ trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
+
mcs = xen_mc_entry(sizeof(*u));
u = mcs.args;
@@ -225,6 +229,24 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
*u = *update;
}
+static void xen_extend_mmuext_op(const struct mmuext_op *op)
+{
+ struct multicall_space mcs;
+ struct mmuext_op *u;
+
+ mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
+
+ if (mcs.mc != NULL) {
+ mcs.mc->args[1]++;
+ } else {
+ mcs = __xen_mc_entry(sizeof(*u));
+ MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
+ }
+
+ u = mcs.args;
+ *u = *op;
+}
+
static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
{
struct mmu_update u;
@@ -245,6 +267,8 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
static void xen_set_pmd(pmd_t *ptr, pmd_t val)
{
+ trace_xen_mmu_set_pmd(ptr, val);
+
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
@@ -282,22 +306,30 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
return true;
}
-static void xen_set_pte(pte_t *ptep, pte_t pteval)
+static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
{
if (!xen_batched_set_pte(ptep, pteval))
native_set_pte(ptep, pteval);
}
+static void xen_set_pte(pte_t *ptep, pte_t pteval)
+{
+ trace_xen_mmu_set_pte(ptep, pteval);
+ __xen_set_pte(ptep, pteval);
+}
+
static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
- xen_set_pte(ptep, pteval);
+ trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
+ __xen_set_pte(ptep, pteval);
}
pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
/* Just return the pte as-is. We preserve the bits on commit */
+ trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
return *ptep;
}
@@ -306,6 +338,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
{
struct mmu_update u;
+ trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
xen_mc_batch();
u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
@@ -462,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-#ifdef CONFIG_XEN_DEBUG
-pte_t xen_make_pte_debug(pteval_t pte)
-{
- phys_addr_t addr = (pte & PTE_PFN_MASK);
- phys_addr_t other_addr;
- bool io_page = false;
- pte_t _pte;
-
- if (pte & _PAGE_IOMAP)
- io_page = true;
-
- _pte = xen_make_pte(pte);
-
- if (!addr)
- return _pte;
-
- if (io_page &&
- (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
- other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
- WARN_ONCE(addr != other_addr,
- "0x%lx is using VM_IO, but it is 0x%lx!\n",
- (unsigned long)addr, (unsigned long)other_addr);
- } else {
- pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
- other_addr = (_pte.pte & PTE_PFN_MASK);
- WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
- "0x%lx is missing VM_IO (and wasn't fixed)!\n",
- (unsigned long)addr);
- }
-
- return _pte;
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
-#endif
-
static pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -530,6 +528,8 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
static void xen_set_pud(pud_t *ptr, pud_t val)
{
+ trace_xen_mmu_set_pud(ptr, val);
+
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
@@ -543,17 +543,20 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
#ifdef CONFIG_X86_PAE
static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
+ trace_xen_mmu_set_pte_atomic(ptep, pte);
set_64bit((u64 *)ptep, native_pte_val(pte));
}
static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
+ trace_xen_mmu_pte_clear(mm, addr, ptep);
if (!xen_batched_set_pte(ptep, native_make_pte(0)))
native_pte_clear(mm, addr, ptep);
}
static void xen_pmd_clear(pmd_t *pmdp)
{
+ trace_xen_mmu_pmd_clear(pmdp);
set_pmd(pmdp, __pmd(0));
}
#endif /* CONFIG_X86_PAE */
@@ -629,6 +632,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
{
pgd_t *user_ptr = xen_get_user_pgd(ptr);
+ trace_xen_mmu_set_pgd(ptr, user_ptr, val);
+
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
@@ -788,14 +793,12 @@ static void xen_pte_unlock(void *v)
static void xen_do_pin(unsigned level, unsigned long pfn)
{
- struct mmuext_op *op;
- struct multicall_space mcs;
+ struct mmuext_op op;
- mcs = __xen_mc_entry(sizeof(*op));
- op = mcs.args;
- op->cmd = level;
- op->arg1.mfn = pfn_to_mfn(pfn);
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+ op.cmd = level;
+ op.arg1.mfn = pfn_to_mfn(pfn);
+
+ xen_extend_mmuext_op(&op);
}
static int xen_pin_page(struct mm_struct *mm, struct page *page,
@@ -863,6 +866,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
+ trace_xen_mmu_pgd_pin(mm, pgd);
+
xen_mc_batch();
if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
@@ -988,6 +993,8 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page,
/* Release a pagetables pages back as normal RW */
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
{
+ trace_xen_mmu_pgd_unpin(mm, pgd);
+
xen_mc_batch();
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1196,6 +1203,8 @@ static void xen_flush_tlb(void)
struct mmuext_op *op;
struct multicall_space mcs;
+ trace_xen_mmu_flush_tlb(0);
+
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
@@ -1214,6 +1223,8 @@ static void xen_flush_tlb_single(unsigned long addr)
struct mmuext_op *op;
struct multicall_space mcs;
+ trace_xen_mmu_flush_tlb_single(addr);
+
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
@@ -1240,6 +1251,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
} *args;
struct multicall_space mcs;
+ trace_xen_mmu_flush_tlb_others(cpus, mm, va);
+
if (cpumask_empty(cpus))
return; /* nothing to do */
@@ -1275,10 +1288,11 @@ static void set_current_cr3(void *v)
static void __xen_write_cr3(bool kernel, unsigned long cr3)
{
- struct mmuext_op *op;
- struct multicall_space mcs;
+ struct mmuext_op op;
unsigned long mfn;
+ trace_xen_mmu_write_cr3(kernel, cr3);
+
if (cr3)
mfn = pfn_to_mfn(PFN_DOWN(cr3));
else
@@ -1286,13 +1300,10 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
WARN_ON(mfn == 0 && kernel);
- mcs = __xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
- op->arg1.mfn = mfn;
+ op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
+ op.arg1.mfn = mfn;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+ xen_extend_mmuext_op(&op);
if (kernel) {
percpu_write(xen_cr3, cr3);
@@ -1451,19 +1462,52 @@ static void __init xen_release_pmd_init(unsigned long pfn)
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
+static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+{
+ struct multicall_space mcs;
+ struct mmuext_op *op;
+
+ mcs = __xen_mc_entry(sizeof(*op));
+ op = mcs.args;
+ op->cmd = cmd;
+ op->arg1.mfn = pfn_to_mfn(pfn);
+
+ MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
+}
+
+static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
+{
+ struct multicall_space mcs;
+ unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
+
+ mcs = __xen_mc_entry(0);
+ MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
+ pfn_pte(pfn, prot), 0);
+}
+
/* This needs to make sure the new pte page is pinned iff its being
attached to a pinned pagetable. */
-static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
+static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
+ unsigned level)
{
- struct page *page = pfn_to_page(pfn);
+ bool pinned = PagePinned(virt_to_page(mm->pgd));
+
+ trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
+
+ if (pinned) {
+ struct page *page = pfn_to_page(pfn);
- if (PagePinned(virt_to_page(mm->pgd))) {
SetPagePinned(page);
if (!PageHighMem(page)) {
- make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
+ xen_mc_batch();
+
+ __set_pfn_prot(pfn, PAGE_KERNEL_RO);
+
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+ __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
} else {
/* make sure there are no stray mappings of
this page */
@@ -1483,15 +1527,23 @@ static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
}
/* This should never happen until we're OK to use struct page */
-static void xen_release_ptpage(unsigned long pfn, unsigned level)
+static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
+ bool pinned = PagePinned(page);
+
+ trace_xen_mmu_release_ptpage(pfn, level, pinned);
- if (PagePinned(page)) {
+ if (pinned) {
if (!PageHighMem(page)) {
+ xen_mc_batch();
+
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
- make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+ __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
+
+ __set_pfn_prot(pfn, PAGE_KERNEL);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
}
ClearPagePinned(page);
}
@@ -1626,15 +1678,17 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
void __init xen_setup_machphys_mapping(void)
{
struct xen_machphys_mapping mapping;
- unsigned long machine_to_phys_nr_ents;
if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
machine_to_phys_mapping = (unsigned long *)mapping.v_start;
- machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ machine_to_phys_nr = mapping.max_mfn + 1;
} else {
- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
- machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+#ifdef CONFIG_X86_32
+ WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
+ < machine_to_phys_mapping);
+#endif
}
#ifdef CONFIG_X86_64
@@ -1825,6 +1879,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
# endif
#else
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+ case VVAR_PAGE:
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
@@ -1865,7 +1920,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#ifdef CONFIG_X86_64
/* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */
- if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+ if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
+ idx == VVAR_PAGE) {
unsigned long vaddr = __fix_to_virt(idx);
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
}
@@ -1897,9 +1953,6 @@ void __init xen_ident_map_ISA(void)
static void __init xen_post_allocator_init(void)
{
-#ifdef CONFIG_XEN_DEBUG
- pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
-#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -2309,17 +2362,3 @@ out:
return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
- .open = p2m_dump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 1b2b73ff0a6e..0d82003e76ad 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -30,12 +30,13 @@
#define MC_BATCH 32
-#define MC_DEBUG 1
+#define MC_DEBUG 0
#define MC_ARGS (MC_BATCH * 16)
struct mc_buffer {
+ unsigned mcidx, argidx, cbidx;
struct multicall_entry entries[MC_BATCH];
#if MC_DEBUG
struct multicall_entry debug[MC_BATCH];
@@ -46,85 +47,15 @@ struct mc_buffer {
void (*fn)(void *);
void *data;
} callbacks[MC_BATCH];
- unsigned mcidx, argidx, cbidx;
};
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
-/* flush reasons 0- slots, 1- args, 2- callbacks */
-enum flush_reasons
-{
- FL_SLOTS,
- FL_ARGS,
- FL_CALLBACKS,
-
- FL_N_REASONS
-};
-
-#ifdef CONFIG_XEN_DEBUG_FS
-#define NHYPERCALLS 40 /* not really */
-
-static struct {
- unsigned histo[MC_BATCH+1];
-
- unsigned issued;
- unsigned arg_total;
- unsigned hypercalls;
- unsigned histo_hypercalls[NHYPERCALLS];
-
- unsigned flush[FL_N_REASONS];
-} mc_stats;
-
-static u8 zero_stats;
-
-static inline void check_zero(void)
-{
- if (unlikely(zero_stats)) {
- memset(&mc_stats, 0, sizeof(mc_stats));
- zero_stats = 0;
- }
-}
-
-static void mc_add_stats(const struct mc_buffer *mc)
-{
- int i;
-
- check_zero();
-
- mc_stats.issued++;
- mc_stats.hypercalls += mc->mcidx;
- mc_stats.arg_total += mc->argidx;
-
- mc_stats.histo[mc->mcidx]++;
- for(i = 0; i < mc->mcidx; i++) {
- unsigned op = mc->entries[i].op;
- if (op < NHYPERCALLS)
- mc_stats.histo_hypercalls[op]++;
- }
-}
-
-static void mc_stats_flush(enum flush_reasons idx)
-{
- check_zero();
-
- mc_stats.flush[idx]++;
-}
-
-#else /* !CONFIG_XEN_DEBUG_FS */
-
-static inline void mc_add_stats(const struct mc_buffer *mc)
-{
-}
-
-static inline void mc_stats_flush(enum flush_reasons idx)
-{
-}
-#endif /* CONFIG_XEN_DEBUG_FS */
-
void xen_mc_flush(void)
{
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
+ struct multicall_entry *mc;
int ret = 0;
unsigned long flags;
int i;
@@ -135,9 +66,26 @@ void xen_mc_flush(void)
something in the middle */
local_irq_save(flags);
- mc_add_stats(b);
+ trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
+
+ switch (b->mcidx) {
+ case 0:
+ /* no-op */
+ BUG_ON(b->argidx != 0);
+ break;
+
+ case 1:
+ /* Singleton multicall - bypass multicall machinery
+ and just do the call directly. */
+ mc = &b->entries[0];
+
+ mc->result = privcmd_call(mc->op,
+ mc->args[0], mc->args[1], mc->args[2],
+ mc->args[3], mc->args[4]);
+ ret = mc->result < 0;
+ break;
- if (b->mcidx) {
+ default:
#if MC_DEBUG
memcpy(b->debug, b->entries,
b->mcidx * sizeof(struct multicall_entry));
@@ -164,11 +112,10 @@ void xen_mc_flush(void)
}
}
#endif
+ }
- b->mcidx = 0;
- b->argidx = 0;
- } else
- BUG_ON(b->argidx != 0);
+ b->mcidx = 0;
+ b->argidx = 0;
for (i = 0; i < b->cbidx; i++) {
struct callback *cb = &b->callbacks[i];
@@ -188,18 +135,21 @@ struct multicall_space __xen_mc_entry(size_t args)
struct multicall_space ret;
unsigned argidx = roundup(b->argidx, sizeof(u64));
+ trace_xen_mc_entry_alloc(args);
+
BUG_ON(preemptible());
BUG_ON(b->argidx >= MC_ARGS);
- if (b->mcidx == MC_BATCH ||
- (argidx + args) >= MC_ARGS) {
- mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS);
+ if (unlikely(b->mcidx == MC_BATCH ||
+ (argidx + args) >= MC_ARGS)) {
+ trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ?
+ XEN_MC_FL_BATCH : XEN_MC_FL_ARGS);
xen_mc_flush();
argidx = roundup(b->argidx, sizeof(u64));
}
ret.mc = &b->entries[b->mcidx];
-#ifdef MC_DEBUG
+#if MC_DEBUG
b->caller[b->mcidx] = __builtin_return_address(0);
#endif
b->mcidx++;
@@ -218,20 +168,25 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
BUG_ON(preemptible());
BUG_ON(b->argidx >= MC_ARGS);
- if (b->mcidx == 0)
- return ret;
-
- if (b->entries[b->mcidx - 1].op != op)
- return ret;
+ if (unlikely(b->mcidx == 0 ||
+ b->entries[b->mcidx - 1].op != op)) {
+ trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP);
+ goto out;
+ }
- if ((b->argidx + size) >= MC_ARGS)
- return ret;
+ if (unlikely((b->argidx + size) >= MC_ARGS)) {
+ trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE);
+ goto out;
+ }
ret.mc = &b->entries[b->mcidx - 1];
ret.args = &b->args[b->argidx];
b->argidx += size;
BUG_ON(b->argidx >= MC_ARGS);
+
+ trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK);
+out:
return ret;
}
@@ -241,43 +196,13 @@ void xen_mc_callback(void (*fn)(void *), void *data)
struct callback *cb;
if (b->cbidx == MC_BATCH) {
- mc_stats_flush(FL_CALLBACKS);
+ trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK);
xen_mc_flush();
}
+ trace_xen_mc_callback(fn, data);
+
cb = &b->callbacks[b->cbidx++];
cb->fn = fn;
cb->data = data;
}
-
-#ifdef CONFIG_XEN_DEBUG_FS
-
-static struct dentry *d_mc_debug;
-
-static int __init xen_mc_debugfs(void)
-{
- struct dentry *d_xen = xen_init_debugfs();
-
- if (d_xen == NULL)
- return -ENOMEM;
-
- d_mc_debug = debugfs_create_dir("multicalls", d_xen);
-
- debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats);
-
- debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued);
- debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls);
- debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total);
-
- xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug,
- mc_stats.histo, MC_BATCH);
- xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug,
- mc_stats.histo_hypercalls, NHYPERCALLS);
- xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug,
- mc_stats.flush, FL_N_REASONS);
-
- return 0;
-}
-fs_initcall(xen_mc_debugfs);
-
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 4ec8035e3216..dee79b78a90f 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -1,6 +1,8 @@
#ifndef _XEN_MULTICALLS_H
#define _XEN_MULTICALLS_H
+#include <trace/events/xen.h>
+
#include "xen-ops.h"
/* Multicalls */
@@ -20,8 +22,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
static inline void xen_mc_batch(void)
{
unsigned long flags;
+
/* need to disable interrupts until this entry is complete */
local_irq_save(flags);
+ trace_xen_mc_batch(paravirt_get_lazy_mode());
__this_cpu_write(xen_mc_irq_flags, flags);
}
@@ -37,6 +41,8 @@ void xen_mc_flush(void);
/* Issue a multicall if we're not in a lazy mode */
static inline void xen_mc_issue(unsigned mode)
{
+ trace_xen_mc_issue(mode);
+
if ((paravirt_get_lazy_mode() & mode) == 0)
xen_mc_flush();
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58efeb9d5440..1b267e75158d 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,7 +161,9 @@
#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
+#include "multicalls.h"
#include "xen-ops.h"
static void __init m2p_override_init(void);
@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn)
}
/* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+int m2p_add_override(unsigned long mfn, struct page *page,
+ struct gnttab_map_grant_ref *kmap_op)
{
unsigned long flags;
unsigned long pfn;
@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
-
- page->private = mfn;
+ WARN_ON(PagePrivate(page));
+ SetPagePrivate(page);
+ set_page_private(page, mfn);
page->index = pfn_to_mfn(pfn);
if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
return -ENOMEM;
- if (clear_pte && !PageHighMem(page))
- /* Just zap old mapping for now */
- pte_clear(&init_mm, address, ptep);
+ if (kmap_op != NULL) {
+ if (!PageHighMem(page)) {
+ struct multicall_space mcs =
+ xen_mc_entry(sizeof(*kmap_op));
+
+ MULTI_grant_table_op(mcs.mc,
+ GNTTABOP_map_grant_ref, kmap_op, 1);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+ }
+ /* let's use dev_bus_addr to record the old mfn instead */
+ kmap_op->dev_bus_addr = page->index;
+ page->index = (unsigned long) kmap_op;
+ }
spin_lock_irqsave(&m2p_override_lock, flags);
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte)
spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags);
- set_phys_to_machine(pfn, page->index);
+ WARN_ON(!PagePrivate(page));
+ ClearPagePrivate(page);
- if (clear_pte && !PageHighMem(page))
- set_pte_at(&init_mm, address, ptep,
- pfn_pte(pfn, PAGE_KERNEL));
- /* No tlb flush necessary because the caller already
- * left the pte unmapped. */
+ if (clear_pte) {
+ struct gnttab_map_grant_ref *map_op =
+ (struct gnttab_map_grant_ref *) page->index;
+ set_phys_to_machine(pfn, map_op->dev_bus_addr);
+ if (!PageHighMem(page)) {
+ struct multicall_space mcs;
+ struct gnttab_unmap_grant_ref *unmap_op;
+
+ /*
+ * It might be that we queued all the m2p grant table
+ * hypercalls in a multicall, then m2p_remove_override
+ * get called before the multicall has actually been
+ * issued. In this case handle is going to -1 because
+ * it hasn