summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/x86/include/asm/pgtable_types.h11
-rw-r--r--arch/x86/mm/fault.c22
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/enlighten.c19
-rw-r--r--arch/x86/xen/mmu.c48
-rw-r--r--arch/x86/xen/p2m.c23
-rw-r--r--arch/x86/xen/p2m.h15
-rw-r--r--arch/x86/xen/setup.c370
-rw-r--r--arch/x86/xen/smp.c29
-rw-r--r--arch/x86/xen/smp.h8
-rw-r--r--arch/x86/xen/xen-head.S36
-rw-r--r--drivers/block/xen-blkback/xenbus.c11
-rw-r--r--drivers/block/xen-blkfront.c5
-rw-r--r--drivers/char/tpm/xen-tpmfront.c13
-rw-r--r--drivers/input/misc/xen-kbdfront.c5
-rw-r--r--drivers/net/xen-netback/xenbus.c10
-rw-r--r--drivers/net/xen-netfront.c16
-rw-r--r--drivers/pci/xen-pcifront.c6
-rw-r--r--drivers/scsi/Kconfig10
-rw-r--r--drivers/scsi/Makefile1
-rw-r--r--drivers/scsi/xen-scsifront.c1026
-rw-r--r--drivers/tty/hvc/hvc_xen.c9
-rw-r--r--drivers/video/fbdev/xen-fbfront.c5
-rw-r--r--drivers/xen/Kconfig9
-rw-r--r--drivers/xen/Makefile1
-rw-r--r--drivers/xen/efi.c2
-rw-r--r--drivers/xen/events/events_base.c5
-rw-r--r--drivers/xen/grant-table.c2
-rw-r--r--drivers/xen/xen-pciback/xenbus.c6
-rw-r--r--drivers/xen/xen-scsiback.c2126
-rw-r--r--drivers/xen/xenbus/xenbus_client.c9
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c6
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h4
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c8
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c8
-rw-r--r--include/xen/events.h2
-rw-r--r--include/xen/interface/elfnote.h48
-rw-r--r--include/xen/interface/io/vscsiif.h229
-rw-r--r--include/xen/interface/xen.h272
-rw-r--r--include/xen/xenbus.h21
46 files changed, 4214 insertions, 263 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index b28dc111d4a7..f8d882e13200 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10268,6 +10268,15 @@ S: Supported
F: drivers/block/xen-blkback/*
F: drivers/block/xen*
+XEN PVSCSI DRIVERS
+M: Juergen Gross <jgross@suse.com>
+L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
+L: linux-scsi@vger.kernel.org
+S: Supported
+F: drivers/scsi/xen-scsifront.c
+F: drivers/xen/xen-scsiback.c
+F: include/xen/interface/io/vscsiif.h
+
XEN SWIOTLB SUBSYSTEM
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 18f392f8b744..89c4b5ccc68d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1779,7 +1779,7 @@ config XEN_DOM0
depends on XEN
config XEN
- bool "Xen guest support on ARM (EXPERIMENTAL)"
+ bool "Xen guest support on ARM"
depends on ARM && AEABI && OF
depends on CPU_V7 && !CPU_V6
depends on !GENERIC_ATOMIC64
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c49ca4c738bb..ac9afde76dea 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -349,7 +349,7 @@ config XEN_DOM0
depends on XEN
config XEN
- bool "Xen guest support on ARM64 (EXPERIMENTAL)"
+ bool "Xen guest support on ARM64"
depends on ARM64 && OF
select SWIOTLB_XEN
help
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0f9724c9c510..07789647bf33 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,7 +23,6 @@
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
-#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
@@ -52,7 +51,7 @@
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
-#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
+#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
@@ -168,10 +167,10 @@
#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
+#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
+#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS)
+#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC)
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..83bb03bfa259 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
* cross-processor TLB flush, even if no stale TLB entries exist
* on other processors.
*
+ * Spurious faults may only occur if the TLB contains an entry with
+ * fewer permission than the page table entry. Non-present (P = 0)
+ * and reserved bit (R = 1) faults are never spurious.
+ *
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
+ *
+ * Returns non-zero if a spurious fault was handled, zero otherwise.
+ *
+ * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
+ * (Optional Invalidation).
*/
static noinline int
spurious_fault(unsigned long error_code, unsigned long address)
@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address)
pte_t *pte;
int ret;
- /* Reserved-bit violation or user access to kernel space? */
- if (error_code & (PF_USER | PF_RSVD))
+ /*
+ * Only writes to RO or instruction fetches from NX may cause
+ * spurious faults.
+ *
+ * These could be from user or supervisor accesses but the TLB
+ * is only lazily flushed after a kernel mapping protection
+ * change, so user accesses are not expected to cause spurious
+ * faults.
+ */
+ if (error_code != (PF_WRITE | PF_PROT)
+ && error_code != (PF_INSTR | PF_PROT))
return 0;
pgd = init_mm.pgd + pgd_index(address);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7d05565ba781..c8140e12816a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -537,7 +537,7 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base);
}
-pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
+pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
EXPORT_SYMBOL_GPL(__supported_pte_mask);
/* user-defined highmem size */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5621c47d7a1a..5d984769cbd8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on);
* around without checking the pgd every time.
*/
-pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
+pteval_t __supported_pte_mask __read_mostly = ~0;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
int force_personality32;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 2ae525e0d8ba..37c1435889ce 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
*/
prot |= _PAGE_CACHE_UC_MINUS;
- prot |= _PAGE_IOMAP; /* creating a mapping for IO */
-
vma->vm_page_prot = __pgprot(prot);
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a02e09e18f57..be14cc3e48d5 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -15,12 +15,14 @@
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitops.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/string.h>
#include <xen/xen-ops.h>
+#include <asm/page.h>
#include <asm/setup.h>
void __init xen_efi_init(void)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0cb11fb5008..acb0effd8077 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu)
pv_cpu_ops.load_gdt = xen_load_gdt;
}
+#ifdef CONFIG_XEN_PVH
/*
* A PV guest starts with default flags that are not set for PVH, set them
* here asap.
@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void)
return;
xen_have_vector_callback = 1;
+
+ xen_pvh_early_cpu_init(0, false);
xen_pvh_set_cr_flags(0);
#ifdef CONFIG_X86_32
BUG(); /* PVH: Implement proper support. */
#endif
}
+#endif /* CONFIG_XEN_PVH */
/* First C function to be called on Xen boot */
asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
+ unsigned long initrd_start = 0;
int rc;
if (!xen_start_info)
@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
xen_setup_features();
+#ifdef CONFIG_XEN_PVH
xen_pvh_early_guest_init();
+#endif
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
- __supported_pte_mask |= _PAGE_IOMAP;
-
/*
* Prevent page tables from being allocated in highmem, even
* if CONFIG_HIGHPTE is enabled.
@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
new_cpu_data.x86_capability[0] = cpuid_edx(1);
#endif
+ if (xen_start_info->mod_start) {
+ if (xen_start_info->flags & SIF_MOD_START_PFN)
+ initrd_start = PFN_PHYS(xen_start_info->mod_start);
+ else
+ initrd_start = __pa(xen_start_info->mod_start);
+ }
+
/* Poke various useful things into boot_params */
boot_params.hdr.type_of_loader = (9 << 4) | 0;
- boot_params.hdr.ramdisk_image = xen_start_info->mod_start
- ? __pa(xen_start_info->mod_start) : 0;
+ boot_params.hdr.ramdisk_image = initrd_start;
boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 16fb0099b7f2..f62af7647ec9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
- } else {
- /*
- * Paramount to do this test _after_ the
- * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
- * IDENTITY_FRAME_BIT resolves to true.
- */
- mfn &= ~FOREIGN_FRAME_BIT;
- if (mfn & IDENTITY_FRAME_BIT) {
- mfn &= ~IDENTITY_FRAME_BIT;
- flags |= _PAGE_IOMAP;
- }
- }
+ } else
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
return val;
}
-static pteval_t iomap_pte(pteval_t val)
-{
- if (val & _PAGE_PRESENT) {
- unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- pteval_t flags = val & PTE_FLAGS_MASK;
-
- /* We assume the pte frame number is a MFN, so
- just use it as-is. */
- val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
- }
-
- return val;
-}
-
__visible pteval_t xen_pte_val(pte_t pte)
{
pteval_t pteval = pte.pte;
@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte)
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
}
#endif
- if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
- return pteval;
-
return pte_mfn_to_pfn(pteval);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
__visible pte_t xen_make_pte(pteval_t pte)
{
- phys_addr_t addr = (pte & PTE_PFN_MASK);
#if 0
/* If Linux is trying to set a WC pte, then map to the Xen WC.
* If _PAGE_PAT is set, then it probably means it is really
@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte)
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
}
#endif
- /*
- * Unprivileged domains are allowed to do IOMAPpings for
- * PCI passthrough, but not map ISA space. The ISA
- * mappings are just dummy local mappings to keep other
- * parts of the kernel happy.
- */
- if (unlikely(pte & _PAGE_IOMAP) &&
- (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
- pte = iomap_pte(pte);
- } else {
- pte &= ~_PAGE_IOMAP;
- pte = pte_pfn_to_mfn(pte);
- }
+ pte = pte_pfn_to_mfn(pte);
return native_make_pte(pte);
}
@@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
default:
/* By default, set_fixmap is used for hardware mappings */
- pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
+ pte = mfn_pte(phys, prot);
break;
}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 3172692381ae..9f5983b01ed9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -173,6 +173,7 @@
#include <xen/balloon.h>
#include <xen/grant_table.h>
+#include "p2m.h"
#include "multicalls.h"
#include "xen-ops.h"
@@ -180,12 +181,6 @@ static void __init m2p_override_init(void);
unsigned long xen_max_p2m_pfn __read_mostly;
-#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
-#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
-#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
-
-#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
-
/* Placeholders for holes in the address space */
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-/* We might hit two boundary violations at the start and end, at max each
- * boundary violation will require three middle nodes. */
-RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3);
-
-/* When we populate back during bootup, the amount of pages can vary. The
- * max we have is seen is 395979, but that does not mean it can't be more.
- * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
- * it can re-use Xen provided mfn_list array, so we only need to allocate at
- * most three P2M top nodes. */
-RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
+/* For each I/O range remapped we may lose up to two leaf pages for the boundary
+ * violations and three mid pages to cover up to 3GB. With
+ * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
+ * remapped region.
+ */
+RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
static inline unsigned p2m_top_index(unsigned long pfn)
{
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h
new file mode 100644
index 000000000000..ad8aee24ab72
--- /dev/null
+++ b/arch/x86/xen/p2m.h
@@ -0,0 +1,15 @@
+#ifndef _XEN_P2M_H
+#define _XEN_P2M_H
+
+#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
+#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
+#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
+
+#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
+
+#define MAX_REMAP_RANGES 10
+
+extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
+
+#endif /* _XEN_P2M_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2e555163c2fe..af7216128d93 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -29,6 +29,7 @@
#include <xen/features.h>
#include "xen-ops.h"
#include "vdso.h"
+#include "p2m.h"
/* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[];
@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
/* Number of pages released from the initial allocation. */
unsigned long xen_released_pages;
+/* Buffer used to remap identity mapped pages */
+unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
+
/*
* The maximum amount of extra memory compared to the base size. The
* main scaling factor is the size of struct page. At extreme ratios
@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start,
return len;
}
-static unsigned long __init xen_release_chunk(unsigned long start,
- unsigned long end)
-{
- return xen_do_chunk(start, end, true);
-}
-
-static unsigned long __init xen_populate_chunk(
+/*
+ * Finds the next RAM pfn available in the E820 map after min_pfn.
+ * This function updates min_pfn with the pfn found and returns
+ * the size of that range or zero if not found.
+ */
+static unsigned long __init xen_find_pfn_range(
const struct e820entry *list, size_t map_size,
- unsigned long max_pfn, unsigned long *last_pfn,
- unsigned long credits_left)
+ unsigned long *min_pfn)
{
const struct e820entry *entry;
unsigned int i;
unsigned long done = 0;
- unsigned long dest_pfn;
for (i = 0, entry = list; i < map_size; i++, entry++) {
unsigned long s_pfn;
unsigned long e_pfn;
- unsigned long pfns;
- long capacity;
-
- if (credits_left <= 0)
- break;
if (entry->type != E820_RAM)
continue;
e_pfn = PFN_DOWN(entry->addr + entry->size);
- /* We only care about E820 after the xen_start_info->nr_pages */
- if (e_pfn <= max_pfn)
+ /* We only care about E820 after this */
+ if (e_pfn < *min_pfn)
continue;
s_pfn = PFN_UP(entry->addr);
- /* If the E820 falls within the nr_pages, we want to start
- * at the nr_pages PFN.
- * If that would mean going past the E820 entry, skip it
+
+ /* If min_pfn falls within the E820 entry, we want to start
+ * at the min_pfn PFN.
*/
- if (s_pfn <= max_pfn) {
- capacity = e_pfn - max_pfn;
- dest_pfn = max_pfn;
+ if (s_pfn <= *min_pfn) {
+ done = e_pfn - *min_pfn;
} else {
- capacity = e_pfn - s_pfn;
- dest_pfn = s_pfn;
+ done = e_pfn - s_pfn;
+ *min_pfn = s_pfn;
}
+ break;
+ }
- if (credits_left < capacity)
- capacity = credits_left;
+ return done;
+}
- pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
- done += pfns;
- *last_pfn = (dest_pfn + pfns);
- if (pfns < capacity)
- break;
- credits_left -= pfns;
+/*
+ * This releases a chunk of memory and then does the identity map. It's used as
+ * as a fallback if the remapping fails.
+ */
+static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
+ unsigned long *released)
+{
+ WARN_ON(start_pfn > end_pfn);
+
+ /* Need to release pages first */
+ *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
+ *identity += set_phys_range_identity(start_pfn, end_pfn);
+}
+
+/*
+ * Helper function to update both the p2m and m2p tables.
+ */
+static unsigned long __init xen_update_mem_tables(unsigned long pfn,
+ unsigned long mfn)
+{
+ struct mmu_update update = {
+ .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
+ .val = pfn
+ };
+
+ /* Update p2m */
+ if (!early_set_phys_to_machine(pfn, mfn)) {
+ WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
+ pfn, mfn);
+ return false;
}
- return done;
+
+ /* Update m2p */
+ if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
+ WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
+ mfn, pfn);
+ return false;
+ }
+
+ return true;
}
-static void __init xen_set_identity_and_release_chunk(
- unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
- unsigned long *released, unsigned long *identity)
+/*
+ * This function updates the p2m and m2p tables with an identity map from
+ * start_pfn to start_pfn+size and remaps the underlying RAM of the original
+ * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
+ * to not exhaust the reserved brk space. Doing it in properly aligned blocks
+ * ensures we only allocate the minimum required leaf pages in the p2m table. It
+ * copies the existing mfns from the p2m table under the 1:1 map, overwrites
+ * them with the identity map and then updates the p2m and m2p tables with the
+ * remapped memory.
+ */
+static unsigned long __init xen_do_set_identity_and_remap_chunk(
+ unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
{
- unsigned long pfn;
+ unsigned long ident_pfn_iter, remap_pfn_iter;
+ unsigned long ident_start_pfn_align, remap_start_pfn_align;
+ unsigned long ident_end_pfn_align, remap_end_pfn_align;
+ unsigned long ident_boundary_pfn, remap_boundary_pfn;
+ unsigned long ident_cnt = 0;
+ unsigned long remap_cnt = 0;
+ unsigned long left = size;
+ unsigned long mod;
+ int i;
+
+ WARN_ON(size == 0);
+
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
/*
- * If the PFNs are currently mapped, clear the mappings
- * (except for the ISA region which must be 1:1 mapped) to
- * release the refcounts (in Xen) on the original frames.
+ * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
+ * blocks. We need to keep track of both the existing pfn mapping and
+ * the new pfn remapping.
*/
- for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
- pte_t pte = __pte_ma(0);
+ mod = start_pfn % P2M_PER_PAGE;
+ ident_start_pfn_align =
+ mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
+ mod = remap_pfn % P2M_PER_PAGE;
+ remap_start_pfn_align =
+ mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
+ mod = (start_pfn + size) % P2M_PER_PAGE;
+ ident_end_pfn_align = start_pfn + size - mod;
+ mod = (remap_pfn + size) % P2M_PER_PAGE;
+ remap_end_pfn_align = remap_pfn + size - mod;
+
+ /* Iterate over each p2m leaf node in each range */
+ for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
+ ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
+ ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
+ /* Check we aren't past the end */
+ BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
+ BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
+
+ /* Save p2m mappings */
+ for (i = 0; i < P2M_PER_PAGE; i++)
+ xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
+
+ /* Set identity map which will free a p2m leaf */
+ ident_cnt += set_phys_range_identity(ident_pfn_iter,
+ ident_pfn_iter + P2M_PER_PAGE);
+
+#ifdef DEBUG
+ /* Helps verify a p2m leaf has been freed */
+ for (i = 0; i < P2M_PER_PAGE; i++) {
+ unsigned int pfn = ident_pfn_iter + i;
+ BUG_ON(pfn_to_mfn(pfn) != pfn);
+ }
+#endif
+ /* Now remap memory */
+ for (i = 0; i < P2M_PER_PAGE; i++) {
+ unsigned long mfn = xen_remap_buf[i];
+
+ /* This will use the p2m leaf freed above */
+ if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
+ WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
+ remap_pfn_iter + i, mfn);
+ return 0;
+ }
+
+ remap_cnt++;
+ }
- if (pfn < PFN_UP(ISA_END_ADDRESS))
- pte = mfn_pte(pfn, PAGE_KERNEL_IO);
+ left -= P2M_PER_PAGE;
+ }
- (void)HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
+ /* Max boundary space possible */
+ BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
+
+ /* Now handle the boundary conditions */
+ ident_boundary_pfn = start_pfn;
+ remap_boundary_pfn = remap_pfn;
+ for (i = 0; i < left; i++) {
+ unsigned long mfn;
+
+ /* These two checks move from the start to end boundaries */
+ if (ident_boundary_pfn == ident_start_pfn_align)
+ ident_boundary_pfn = ident_pfn_iter;
+ if (remap_boundary_pfn == remap_start_pfn_align)
+ remap_boundary_pfn = remap_pfn_iter;
+
+ /* Check we aren't past the end */
+ BUG_ON(ident_boundary_pfn >= start_pfn + size);
+ BUG_ON(remap_boundary_pfn >= remap_pfn + size);
+
+ mfn = pfn_to_mfn(ident_boundary_pfn);
+
+ if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
+ WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
+ remap_pfn_iter + i, mfn);
+ return 0;
+ }
+ remap_cnt++;
+
+ ident_boundary_pfn++;
+ remap_boundary_pfn++;
}
- if (start_pfn < nr_pages)
- *released += xen_release_chunk(
- start_pfn, min(end_pfn, nr_pages));
+ /* Finish up the identity map */
+ if (ident_start_pfn_align >= ident_end_pfn_align) {
+ /*
+ * In this case we have an identity range which does not span an
+ * aligned block so everything needs to be identity mapped here.
+ * If we didn't check this we might remap too many pages since
+ * the align boundaries are not meaningful in this case.
+ */
+ ident_cnt += set_phys_range_identity(start_pfn,
+ start_pfn + size);
+ } else {
+ /* Remapped above so check each end of the chunk */
+ if (start_pfn < ident_start_pfn_align)
+ ident_cnt += set_phys_range_identity(start_pfn,
+ ident_start_pfn_align);
+ if (start_pfn + size > ident_pfn_iter)
+ ident_cnt += set_phys_range_identity(ident_pfn_iter,
+ start_pfn + size);
+ }
- *identity += set_phys_range_identity(start_pfn, end_pfn);
+ BUG_ON(ident_cnt != size);
+ BUG_ON(remap_cnt != size);
+
+ return size;
}
-static unsigned long __init xen_set_identity_and_release(
- const struct e820entry *list, size_t map_size, unsigned long nr_pages)
+/*
+ * This function takes a contiguous pfn range that needs to be identity mapped
+ * and:
+ *
+ * 1) Finds a new range of pfns to use to remap bas