From 7655abb953860485940d4de74fb45a8192149bb6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 10 Aug 2017 13:19:09 +0100
Subject: arm64: mm: Move ASID from TTBR0 to TTBR1

In preparation for mapping kernelspace and userspace with different
ASIDs, move the ASID to TTBR1 and update switch_mm to context-switch
TTBR0 via an invalid mapping (the zero page).

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/proc.S | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 95233dfc4c39..a8a64898a2aa 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -139,9 +139,12 @@ ENDPROC(cpu_do_resume)
  */
 ENTRY(cpu_do_switch_mm)
 	pre_ttbr0_update_workaround x0, x2, x3
+	mrs	x2, ttbr1_el1
 	mmid	x1, x1				// get mm->context.id
-	bfi	x0, x1, #48, #16		// set the ASID
-	msr	ttbr0_el1, x0			// set TTBR0
+	bfi	x2, x1, #48, #16		// set the ASID
+	msr	ttbr1_el1, x2			// in TTBR1 (since TCR.A1 is set)
+	isb
+	msr	ttbr0_el1, x0			// now update TTBR0
 	isb
 	post_ttbr0_update_workaround
 	ret
@@ -224,7 +227,7 @@ ENTRY(__cpu_setup)
 	 * both user and kernel.
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
-			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
 	tcr_set_idmap_t0sz	x10, x9
 
 	/*
-- 
cgit v1.2.3


From 85d13c001497b481b4a8cc5d7db243cc44ac2bd8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 10 Aug 2017 13:29:06 +0100
Subject: arm64: mm: Remove pre_ttbr0_update_workaround for Falkor erratum
 #E1003

The pre_ttbr0_update_workaround hook is called prior to context-switching
TTBR0 because Falkor erratum E1003 can cause TLB allocation with the wrong
ASID if both the ASID and the base address of the TTBR are updated at
the same time.

With the ASID sitting safely in TTBR1, we no longer update things
atomically, so we can remove the pre_ttbr0_update_workaround macro as
it's no longer required. The erratum infrastructure and documentation
is left around for #E1003, as it will be required by the entry
trampoline code in a future patch.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/context.c | 11 -----------
 arch/arm64/mm/proc.S    |  1 -
 2 files changed, 12 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 6f4017046323..78a2dc596fee 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -79,13 +79,6 @@ void verify_cpu_asid_bits(void)
 	}
 }
 
-static void set_reserved_asid_bits(void)
-{
-	if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) &&
-	    cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
-		__set_bit(FALKOR_RESERVED_ASID, asid_map);
-}
-
 static void flush_context(unsigned int cpu)
 {
 	int i;
@@ -94,8 +87,6 @@ static void flush_context(unsigned int cpu)
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 
-	set_reserved_asid_bits();
-
 	for_each_possible_cpu(i) {
 		asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
 		/*
@@ -254,8 +245,6 @@ static int asids_init(void)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
 		      NUM_USER_ASIDS);
 
-	set_reserved_asid_bits();
-
 	pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
 	return 0;
 }
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a8a64898a2aa..f2ff0837577c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -138,7 +138,6 @@ ENDPROC(cpu_do_resume)
  *	- pgd_phys - physical address of new TTB
  */
 ENTRY(cpu_do_switch_mm)
-	pre_ttbr0_update_workaround x0, x2, x3
 	mrs	x2, ttbr1_el1
 	mmid	x1, x1				// get mm->context.id
 	bfi	x2, x1, #48, #16		// set the ASID
-- 
cgit v1.2.3


From 158d495899ce55db453f682a8ac8390d5a426578 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 10 Aug 2017 13:34:30 +0100
Subject: arm64: mm: Rename post_ttbr0_update_workaround

The post_ttbr0_update_workaround hook applies to any change to TTBRx_EL1.
Since we're using TTBR1 for the ASID, rename the hook to make it clearer
as to what it's doing.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/proc.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index f2ff0837577c..3146dc96f05b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -145,7 +145,7 @@ ENTRY(cpu_do_switch_mm)
 	isb
 	msr	ttbr0_el1, x0			// now update TTBR0
 	isb
-	post_ttbr0_update_workaround
+	post_ttbr_update_workaround
 	ret
 ENDPROC(cpu_do_switch_mm)
 
-- 
cgit v1.2.3


From 27a921e75711d924617269e0ba4adb8bae9fd0d1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 10 Aug 2017 13:58:16 +0100
Subject: arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN

With the ASID now installed in TTBR1, we can re-enable ARM64_SW_TTBR0_PAN
by ensuring that we switch to a reserved ASID of zero when disabling
user access and restore the active user ASID on the uaccess enable path.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/cache.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7f1dbe962cf5..6cd20a8c0952 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -49,7 +49,7 @@ ENTRY(flush_icache_range)
  *	- end     - virtual end address of region
  */
 ENTRY(__flush_cache_user_range)
-	uaccess_ttbr0_enable x2, x3
+	uaccess_ttbr0_enable x2, x3, x4
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x0, x3
-- 
cgit v1.2.3


From 0c8ea531b7740754cf374ca8b7510655f569c5e3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 10 Aug 2017 14:10:28 +0100
Subject: arm64: mm: Allocate ASIDs in pairs

In preparation for separate kernel/user ASIDs, allocate them in pairs
for each mm_struct. The bottom bit distinguishes the two: if it is set,
then the ASID will map only userspace.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/context.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 78a2dc596fee..1cb3bc92ae5c 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending;
 
 #define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
 #define ASID_FIRST_VERSION	(1UL << asid_bits)
-#define NUM_USER_ASIDS		ASID_FIRST_VERSION
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define NUM_USER_ASIDS		(ASID_FIRST_VERSION >> 1)
+#define asid2idx(asid)		(((asid) & ~ASID_MASK) >> 1)
+#define idx2asid(idx)		(((idx) << 1) & ~ASID_MASK)
+#else
+#define NUM_USER_ASIDS		(ASID_FIRST_VERSION)
+#define asid2idx(asid)		((asid) & ~ASID_MASK)
+#define idx2asid(idx)		asid2idx(idx)
+#endif
 
 /* Get the ASIDBits supported by the current CPU */
 static u32 get_cpu_asid_bits(void)
@@ -98,7 +107,7 @@ static void flush_context(unsigned int cpu)
 		 */
 		if (asid == 0)
 			asid = per_cpu(reserved_asids, i);
-		__set_bit(asid & ~ASID_MASK, asid_map);
+		__set_bit(asid2idx(asid), asid_map);
 		per_cpu(reserved_asids, i) = asid;
 	}
 
@@ -153,16 +162,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 		 * We had a valid ASID in a previous life, so try to re-use
 		 * it if possible.
 		 */
-		asid &= ~ASID_MASK;
-		if (!__test_and_set_bit(asid, asid_map))
+		if (!__test_and_set_bit(asid2idx(asid), asid_map))
 			return newasid;
 	}
 
 	/*
 	 * Allocate a free ASID. If we can't find one, take a note of the
-	 * currently active ASIDs and mark the TLBs as requiring flushes.
-	 * We always count from ASID #1, as we use ASID #0 when setting a
-	 * reserved TTBR0 for the init_mm.
+	 * currently active ASIDs and mark the TLBs as requiring flushes.  We
+	 * always count from ASID #2 (index 1), as we use ASID #0 when setting
+	 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+	 * pairs.
 	 */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
 	if (asid != NUM_USER_ASIDS)
@@ -179,7 +188,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 set_asid:
 	__set_bit(asid, asid_map);
 	cur_idx = asid;
-	return asid | generation;
+	return idx2asid(asid) | generation;
 }
 
 void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
-- 
cgit v1.2.3


From 51a0048beb449682d632d0af52a515adb9f9882e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 14 Nov 2017 14:14:17 +0000
Subject: arm64: mm: Map entry trampoline into trampoline and kernel page
 tables

The exception entry trampoline needs to be mapped at the same virtual
address in both the trampoline page table (which maps nothing else)
and also the kernel page table, so that we can swizzle TTBR1_EL1 on
exceptions from and return to EL0.

This patch maps the trampoline at a fixed virtual address in the fixmap
area of the kernel virtual address space, which allows the kernel proper
to be randomized with respect to the trampoline when KASLR is enabled.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/mmu.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 267d2b79d52d..fe68a48c64cb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -525,6 +525,29 @@ static int __init parse_rodata(char *arg)
 }
 early_param("rodata", parse_rodata);
 
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static int __init map_entry_trampoline(void)
+{
+	extern char __entry_tramp_text_start[];
+
+	pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+	phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
+
+	/* The trampoline is always mapped and can therefore be global */
+	pgprot_val(prot) &= ~PTE_NG;
+
+	/* Map only the text into the trampoline page table */
+	memset(tramp_pg_dir, 0, PGD_SIZE);
+	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
+			     prot, pgd_pgtable_alloc, 0);
+
+	/* ...as well as the kernel page table */
+	__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+	return 0;
+}
+core_initcall(map_entry_trampoline);
+#endif
+
 /*
  * Create fine-grained mappings for the kernel.
  */
-- 
cgit v1.2.3


From 6c27c4082f4f70b9f41df4d0adf51128b40351df Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 6 Dec 2017 11:24:02 +0000
Subject: arm64: kaslr: Put kernel vectors address in separate data page

The literal pool entry for identifying the vectors base is the only piece
of information in the trampoline page that identifies the true location
of the kernel.

This patch moves it into a page-aligned region of the .rodata section
and maps this adjacent to the trampoline text via an additional fixmap
entry, which protects against any accidental leakage of the trampoline
contents.

Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Laura Abbott <labbott@redhat.com>
Tested-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/mmu.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index fe68a48c64cb..916d9ced1c3f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -541,8 +541,16 @@ static int __init map_entry_trampoline(void)
 	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
 			     prot, pgd_pgtable_alloc, 0);
 
-	/* ...as well as the kernel page table */
+	/* Map both the text and data into the kernel page table */
 	__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		extern char __entry_tramp_data_start[];
+
+		__set_fixmap(FIX_ENTRY_TRAMP_DATA,
+			     __pa_symbol(__entry_tramp_data_start),
+			     PAGE_KERNEL_RO);
+	}
+
 	return 0;
 }
 core_initcall(map_entry_trampoline);
-- 
cgit v1.2.3


From 787fd1d019b269af7912249231dfe34a5fe3e7c8 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 13 Dec 2017 17:07:17 +0000
Subject: arm64: limit PA size to supported range

We currently copy the physical address size from
ID_AA64MMFR0_EL1.PARange directly into TCR.(I)PS. This will not work for
4k and 16k granule kernels on systems that support 52-bit physical
addresses, since 52-bit addresses are only permitted with the 64k
granule.

To fix this, fall back to 48 bits when configuring the PA size when the
kernel does not support 52-bit PAs. When it does, fall back to 52, to
avoid similar problems in the future if the PA size is ever increased
above 52.

Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: tcr_set_pa_size macro renamed to tcr_compute_pa_size]
[catalin.marinas@arm.com: comments added to tcr_compute_pa_size]
[catalin.marinas@arm.com: definitions added for TCR_*PS_SHIFT]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 95233dfc4c39..4f133cb340dc 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -228,11 +228,9 @@ ENTRY(__cpu_setup)
 	tcr_set_idmap_t0sz	x10, x9
 
 	/*
-	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
-	 * TCR_EL1.
+	 * Set the IPS bits in TCR_EL1.
 	 */
-	mrs	x9, ID_AA64MMFR0_EL1
-	bfi	x10, x9, #32, #3
+	tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
 #ifdef CONFIG_ARM64_HW_AFDBM
 	/*
 	 * Hardware update of the Access and Dirty bits.
-- 
cgit v1.2.3


From 529c4b05a3cb2f324aac347042ee6d641478e946 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 13 Dec 2017 17:07:18 +0000
Subject: arm64: handle 52-bit addresses in TTBR

The top 4 bits of a 52-bit physical address are positioned at bits 2..5
in the TTBR registers. Introduce a couple of macros to move the bits
there, and change all TTBR writers to use them.

Leave TTBR0 PAN code unchanged, to avoid complicating it. A system with
52-bit PA will have PAN anyway (because it's ARMv8.1 or later), and a
system without 52-bit PA can only use up to 48-bit PAs. A later patch in
this series will add a kconfig dependency to ensure PAN is configured.

In addition, when using 52-bit PA there is a special alignment
requirement on the top-level table. We don't currently have any VA_BITS
configuration that would violate the requirement, but one could be added
in the future, so add a compile-time BUG_ON to check for it.

Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: added TTBR_BADD_MASK_52 comment]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/pgd.c  |  8 ++++++++
 arch/arm64/mm/proc.S | 13 ++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 051e71ec3335..289f9113a27a 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -49,6 +49,14 @@ void __init pgd_cache_init(void)
 	if (PGD_SIZE == PAGE_SIZE)
 		return;
 
+#ifdef CONFIG_ARM64_PA_BITS_52
+	/*
+	 * With 52-bit physical addresses, the architecture requires the
+	 * top-level table to be aligned to at least 64 bytes.
+	 */
+	BUILD_BUG_ON(PGD_SIZE < 64);
+#endif
+
 	/*
 	 * Naturally aligned pgds required by the architecture.
 	 */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 4f133cb340dc..e79db5a7576a 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -138,10 +138,11 @@ ENDPROC(cpu_do_resume)
  *	- pgd_phys - physical address of new TTB
  */
 ENTRY(cpu_do_switch_mm)
-	pre_ttbr0_update_workaround x0, x2, x3
+	phys_to_ttbr x0, x2
+	pre_ttbr0_update_workaround x2, x3, x4
 	mmid	x1, x1				// get mm->context.id
-	bfi	x0, x1, #48, #16		// set the ASID
-	msr	ttbr0_el1, x0			// set TTBR0
+	bfi	x2, x1, #48, #16		// set the ASID
+	msr	ttbr0_el1, x2			// set TTBR0
 	isb
 	post_ttbr0_update_workaround
 	ret
@@ -158,14 +159,16 @@ ENTRY(idmap_cpu_replace_ttbr1)
 	save_and_disable_daif flags=x2
 
 	adrp	x1, empty_zero_page
-	msr	ttbr1_el1, x1
+	phys_to_ttbr x1, x3
+	msr	ttbr1_el1, x3
 	isb
 
 	tlbi	vmalle1
 	dsb	nsh
 	isb
 
-	msr	ttbr1_el1, x0
+	phys_to_ttbr x0, x3
+	msr	ttbr1_el1, x3
 	isb
 
 	restore_daif x2
-- 
cgit v1.2.3


From 193383043f14a398393dc18bae8380f7fe665ec3 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 13 Dec 2017 17:07:20 +0000
Subject: arm64: don't open code page table entry creation

Instead of open coding the generation of page table entries, use the
macros/functions that exist for this - pfn_p*d and p*d_populate. Most
code in the kernel already uses these macros, this patch tries to fix
up the few places that don't. This is useful for the next patch in this
series, which needs to change the page table entry logic, and it's
better to have that logic in one place.

The KVM extended ID map is special, since we're creating a level above
CONFIG_PGTABLE_LEVELS and the required function isn't available. Leave
it as is and add a comment to explain it. (The normal kernel ID map code
doesn't need this change because its page tables are created in assembly
(__create_page_tables)).

Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 267d2b79d52d..0c631a17ae1d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -570,8 +570,8 @@ static void __init map_kernel(pgd_t *pgd)
 		 * entry instead.
 		 */
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
-			__pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE));
+		pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+			     lm_alias(bm_pmd));
 		pud_clear_fixmap();
 	} else {
 		BUG();
@@ -686,7 +686,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 			if (!p)
 				return -ENOMEM;
 
-			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
+			pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
 		} else
 			vmemmap_verify((pte_t *)pmd, node, addr, next);
 	} while (addr = next, addr != end);
@@ -879,15 +879,19 @@ int __init arch_ioremap_pmd_supported(void)
 
 int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
 {
+	pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
+					pgprot_val(mk_sect_prot(prot)));
 	BUG_ON(phys & ~PUD_MASK);
-	set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
 int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
 {
+	pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
+					pgprot_val(mk_sect_prot(prot)));
 	BUG_ON(phys & ~PMD_MASK);
-	set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
-- 
cgit v1.2.3


From fa2a8445b1d3810c52f2a6b3a006456bd1aacb7e Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 13 Dec 2017 17:07:24 +0000
Subject: arm64: allow ID map to be extended to 52 bits

Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.

This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.

One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.

Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Bob Picco <bob.picco@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0c631a17ae1d..baa34418c3bf 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -50,6 +50,7 @@
 #define NO_CONT_MAPPINGS	BIT(1)
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
 
 u64 kimage_voffset __ro_after_init;
 EXPORT_SYMBOL(kimage_voffset);
-- 
cgit v1.2.3


From a8ffaaa060b8d4da6138e0958cb0f45b73e1cb78 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 27 Dec 2017 15:12:56 +0000
Subject: arm64: asid: Do not replace active_asids if already 0

Under some uncommon timing conditions, a generation check and
xchg(active_asids, A1) in check_and_switch_context() on P1 can race with
an ASID roll-over on P2. If P2 has not seen the update to
active_asids[P1], it can re-allocate A1 to a new task T2 on P2. P1 ends
up waiting on the spinlock since the xchg() returned 0 while P2 can go
through a second ASID roll-over with (T2,A1,G2) active on P2. This
roll-over copies active_asids[P1] == A1,G1 into reserved_asids[P1] and
active_asids[P2] == A1,G2 into reserved_asids[P2]. A subsequent
scheduling of T1 on P1 and T2 on P2 would match reserved_asids and get
their generation bumped to G3:

P1					P2
--                                      --
TTBR0.BADDR = T0
TTBR0.ASID = A0
asid_generation = G1
check_and_switch_context(T1,A1,G1)
  generation match
					check_and_switch_context(T2,A0,G0)
 				          new_context()
					    ASID roll-over
					    asid_generation = G2
					    flush_context()
					      active_asids[P1] = 0
					      asid_map[A1] = 0
					      reserved_asids[P1] = A0,G0
  xchg(active_asids, A1)
    active_asids[P1] = A1,G1
    xchg returns 0
  spin_lock_irqsave()
					    allocated ASID (T2,A1,G2)
					    asid_map[A1] = 1
					  active_asids[P2] = A1,G2
					...
					check_and_switch_context(T3,A0,G0)
					  new_context()
					    ASID roll-over
					    asid_generation = G3
					    flush_context()
					      active_asids[P1] = 0
					      asid_map[A1] = 1
					      reserved_asids[P1] = A1,G1
					      reserved_asids[P2] = A1,G2
					    allocated ASID (T3,A2,G3)
					    asid_map[A2] = 1
					  active_asids[P2] = A2,G3
  new_context()
    check_update_reserved_asid(A1,G1)
      matches reserved_asid[P1]
      reserved_asid[P1] = A1,G3
  updated T1 ASID to (T1,A1,G3)
					check_and_switch_context(T2,A1,G2)
					  new_context()
					    check_and_switch_context(A1,G2)
					      matches reserved_asids[P2]
					      reserved_asids[P2] = A1,G3
					  updated T2 ASID to (T2,A1,G3)

At this point, we have two tasks, T1 and T2 both using ASID A1 with the
latest generation G3. Any of them is allowed to be scheduled on the
other CPU leading to two different tasks with the same ASID on the same
CPU.

This patch changes the xchg to cmpxchg so that the active_asids is only
updated if non-zero to avoid a race with an ASID roll-over on a
different CPU.

The ASID allocation algorithm has been formally verified using the TLA+
model checker (see
https://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/kernel-tla.git/tree/asidalloc.tla
for the spec).

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/context.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 1cb3bc92ae5c..1fe71b9fcf35 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -194,26 +194,29 @@ set_asid:
 void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 {
 	unsigned long flags;
-	u64 asid;
+	u64 asid, old_active_asid;
 
 	asid = atomic64_read(&mm->context.id);
 
 	/*
 	 * The memory ordering here is subtle.
-	 * If our ASID matches the current generation, then we update
-	 * our active_asids entry with a relaxed xchg. Racing with a
-	 * concurrent rollover means that either:
+	 * If our active_asids is non-zero and the ASID matches the current
+	 * generation, then we update the active_asids entry with a relaxed
+	 * cmpxchg. Racing with a concurrent rollover means that either:
 	 *
-	 * - We get a zero back from the xchg and end up waiting on the
+	 * - We get a zero back from the cmpxchg and end up waiting on the
 	 *   lock. Taking the lock synchronises with the rollover and so
 	 *   we are forced to see the updated generation.
 	 *
-	 * - We get a valid ASID back from the xchg, which means the
+	 * - We get a valid ASID back from the cmpxchg, which means the
 	 *   relaxed xchg in flush_context will treat us as reserved
 	 *   because atomic RmWs are totally ordered for a given location.
 	 */
-	if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
-	    && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+	old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+	if (old_active_asid &&
+	    !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
+	    atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
+				     old_active_asid, asid))
 		goto switch_mm_fastpath;
 
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
-- 
cgit v1.2.3


From 95e3de3590e3f2358bb13f013911bc1bfa5d3f53 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 Jan 2018 18:19:39 +0000
Subject: arm64: Move post_ttbr_update_workaround to C code

We will soon need to invoke a CPU-specific function pointer after changing
page tables, so move post_ttbr_update_workaround out into C code to make
this possible.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/context.c | 9 +++++++++
 arch/arm64/mm/proc.S    | 3 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 1fe71b9fcf35..511bd1e79b69 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -242,6 +242,15 @@ switch_mm_fastpath:
 		cpu_switch_mm(mm->pgd, mm);
 }
 
+/* Errata workaround post TTBRx_EL1 update. */
+asmlinkage void post_ttbr_update_workaround(void)
+{
+	asm(ALTERNATIVE("nop; nop; nop",
+			"ic iallu; dsb nsh; isb",
+			ARM64_WORKAROUND_CAVIUM_27456,
+			CONFIG_CAVIUM_ERRATUM_27456));
+}
+
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index bc334588f234..bc86f7ef8620 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -146,8 +146,7 @@ ENTRY(cpu_do_switch_mm)
 	phys_to_ttbr x0, x2
 	msr	ttbr0_el1, x2			// now update TTBR0
 	isb
-	post_ttbr_update_workaround
-	ret
+	b	post_ttbr_update_workaround	// Back to C code...
 ENDPROC(cpu_do_switch_mm)
 
 	.pushsection ".idmap.text", "ax"
-- 
cgit v1.2.3


From 0f15adbb2861ce6f75ccfc5a92b19eae0ef327d0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 3 Jan 2018 11:17:58 +0000
Subject: arm64: Add skeleton to harden the branch predictor against aliasing
 attacks

Aliasing attacks against CPU branch predictors can allow an attacker to
redirect speculative control flow on some CPUs and potentially divulge
information from one context to another.

This patch adds initial skeleton code behind a new Kconfig option to
enable implementation-specific mitigations against these attacks for
CPUs that are affected.

Co-developed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/context.c |  2 ++
 arch/arm64/mm/fault.c   | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 511bd1e79b69..ff99a880a730 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -249,6 +249,8 @@ asmlinkage void post_ttbr_update_workaround(void)
 			"ic iallu; dsb nsh; isb",
 			ARM64_WORKAROUND_CAVIUM_27456,
 			CONFIG_CAVIUM_ERRATUM_27456));
+
+	arm64_apply_bp_hardening();
 }
 
 static int asids_init(void)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 22168cd0dde7..0e671ddf4855 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -708,6 +708,23 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	arm64_notify_die("", regs, &info, esr);
 }
 
+asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
+						   unsigned int esr,
+						   struct pt_regs *regs)
+{
+	/*
+	 * We've taken an instruction abort from userspace and not yet
+	 * re-enabled IRQs. If the address is a kernel address, apply
+	 * BP hardening prior to enabling IRQs and pre-emption.
+	 */
+	if (addr > TASK_SIZE)
+		arm64_apply_bp_hardening();
+
+	local_irq_enable();
+	do_mem_abort(addr, esr, regs);
+}
+
+
 asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 					   unsigned int esr,
 					   struct pt_regs *regs)
-- 
cgit v1.2.3


From 6d99b68933fbcf51f84fcbba49246ce1209ec193 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Mon, 8 Jan 2018 15:38:06 +0000
Subject: arm64: alternatives: use tpidr_el2 on VHE hosts

Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
on VHE hosts, and allows future code to blindly access per-cpu variables
without triggering world-switch.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index bc86f7ef8620..5a59eea49395 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend)
 	mrs	x8, mdscr_el1
 	mrs	x9, oslsr_el1
 	mrs	x10, sctlr_el1
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	mrs	x11, tpidr_el1
+alternative_else
+	mrs	x11, tpidr_el2
+alternative_endif
 	mrs	x12, sp_el0
 	stp	x2, x3, [x0]
 	stp	x4, xzr, [x0, #16]
@@ -116,7 +120,11 @@ ENTRY(cpu_do_resume)
 	msr	mdscr_el1, x10
 
 	msr	sctlr_el1, x12
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	msr	tpidr_el1, x13
+alternative_else
+	msr	tpidr_el2, x13
+alternative_endif
 	msr	sp_el0, x14
 	/*
 	 * Restore oslsr_el1 by writing oslar_el1
-- 
cgit v1.2.3


From 83f8ee3a73f551aebb5b0bb029feaf6936615730 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Mon, 8 Jan 2018 15:38:17 +0000
Subject: arm64: mmu: add the entry trampolines start/end section markers into
 sections.h

SDEI needs to calculate an offset in the trampoline page too. Move
the extern char[] to sections.h.

This patch just moves code around.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4071602031ed..62d7abb2f710 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -529,8 +529,6 @@ early_param("rodata", parse_rodata);
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __init map_entry_trampoline(void)
 {
-	extern char __entry_tramp_text_start[];
-
 	pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
 	phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
 
-- 
cgit v1.2.3


From 0370b31e48454d8cf11120664aedd1c51b3004cb Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 11 Jan 2018 10:11:59 +0000
Subject: arm64: Extend early page table code to allow for larger kernels

Currently the early assembler page table code assumes that precisely
1xpgd, 1xpud, 1xpmd are sufficient to represent the early kernel text
mappings.

Unfortunately this is rarely the case when running with a 16KB granule,
and we also run into limits with 4KB granule when building much larger
kernels.

This patch re-writes the early page table logic to compute indices of
mappings for each level of page table, and if multiple indices are
required, the next-level page table is scaled up accordingly.

Also the required size of the swapper_pg_dir is computed at link time
to cover the mapping [KIMAGE_ADDR + VOFFSET, _end]. When KASLR is
enabled, an extra page is set aside for each level that may require extra
entries at runtime.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 62d7abb2f710..b44992ec9643 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -642,7 +642,8 @@ void __init paging_init(void)
 	 * allocated with it.
 	 */
 	memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
-		      SWAPPER_DIR_SIZE - PAGE_SIZE);
+		      __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
+		      - PAGE_SIZE);
 }
 
 /*
-- 
cgit v1.2.3


From 071929dbdd865f779a89ba3f1e06ba8d17dd3743 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Tue, 19 Dec 2017 11:28:10 -0800
Subject: arm64: Stop printing the virtual memory layout

Printing kernel addresses should be done in limited circumstances, mostly
for debugging purposes. Printing out the virtual memory layout at every
kernel bootup doesn't really fall into this category so delete the prints.
There are other ways to get the same information.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 43 -------------------------------------------
 1 file changed, 43 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 5960bef0170d..672094ed7e07 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -599,49 +599,6 @@ void __init mem_init(void)
 
 	mem_init_print_info(NULL);
 
-#define MLK(b, t) b, t, ((t) - (b)) >> 10
-#define MLM(b, t) b, t, ((t) - (b)) >> 20
-#define MLG(b, t) b, t, ((t) - (b)) >> 30
-#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
-
-	pr_notice("Virtual kernel memory layout:\n");
-#ifdef CONFIG_KASAN
-	pr_notice("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
-		MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
-#endif
-	pr_notice("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
-		MLM(MODULES_VADDR, MODULES_END));
-	pr_notice("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
-		MLG(VMALLOC_START, VMALLOC_END));
-	pr_notice("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(_text, _etext));
-	pr_notice("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(__start_rodata, __init_begin));
-	pr_notice("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(__init_begin, __init_end));
-	pr_notice("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(_sdata, _edata));
-	pr_notice("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(__bss_start, __bss_stop));
-	pr_notice("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
-		MLK(FIXADDR_START, FIXADDR_TOP));
-	pr_notice("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
-		MLM(PCI_IO_START, PCI_IO_END));
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-	pr_notice("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
-		MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
-	pr_notice("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
-		MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
-		    (unsigned long)virt_to_page(high_memory)));
-#endif
-	pr_notice("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
-		MLM(__phys_to_virt(memblock_start_of_DRAM()),
-		    (unsigned long)high_memory));
-
-#undef MLK
-#undef MLM
-#undef MLK_ROUNDUP
-
 	/*
 	 * Check boundaries twice: Some fundamental inconsistencies can be
 	 * detected at build time already.
-- 
cgit v1.2.3


From 7a00d68ebe5f07cb1db17e7fedfd031f0d87e8bb Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Mon, 15 Jan 2018 19:38:55 +0000
Subject: arm64: sysreg: Move to use definitions for all the SCTLR bits

__cpu_setup() configures SCTLR_EL1 using some hard coded hex masks,
and el2_setup() duplicates some this when setting RES1 bits.

Lets make this the same as KVM's hyp_init, which uses named bits.

First, we add definitions for all the SCTLR_EL{1,2} bits, the RES{1,0}
bits, and those we want to set or clear.

Add a build_bug checks to ensures all bits are either set or clear.
This means we don't need to preserve endian-ness configuration
generated elsewhere.

Finally, move the head.S and proc.S users of these hard-coded masks
over to the macro versions.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 5a59eea49395..ec4c3d82b4d6 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -226,11 +226,7 @@ ENTRY(__cpu_setup)
 	/*
 	 * Prepare SCTLR
 	 */
-	adr	x5, crval
-	ldp	w5, w6, [x5]
-	mrs	x0, sctlr_el1
-	bic	x0, x0, x5			// clear bits
-	orr	x0, x0, x6			// set bits
+	mov_q	x0, SCTLR_EL1_SET
 	/*
 	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
 	 * both user and kernel.
@@ -259,21 +255,3 @@ ENTRY(__cpu_setup)
 	msr	tcr_el1, x10
 	ret					// return to head.S
 ENDPROC(__cpu_setup)
-
-	/*
-	 * We set the desired value explicitly, including those of the
-	 * reserved bits. The values of bits EE & E0E were set early in
-	 * el2_setup, which are left untouched below.
-	 *
-	 *                 n n            T
-	 *       U E      WT T UD     US IHBS
-	 *       CE0      XWHW CZ     ME TEEA S
-	 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
-	 * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved
-	 * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings
-	 */
-	.type	crval, #object
-crval:
-	.word	0xfcffffff			// clear
-	.word	0x34d5d91d			// set
-	.popsection
-- 
cgit v1.2.3


From 68ddbf09ec5a888ec850edd7e7438d2daf069c56 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Mon, 15 Jan 2018 19:38:59 +0000
Subject: arm64: kernel: Prepare for a DISR user

KVM would like to consume any pending SError (or RAS error) after guest
exit. Today it has to unmask SError and use dsb+isb to synchronise the
CPU. With the RAS extensions we can use ESB to synchronise any pending
SError.

Add the necessary macros to allow DISR to be read and converted to an
ESR.

We clear the DISR register when we enable the RAS cpufeature, and the
kernel has not executed any ESB instructions. Any value we find in DISR
must have belonged to firmware. Executing an ESB instruction is the
only way to update DISR, so we can expect firmware to have handled
any deferred SError. By the same logic we clear DISR in the idle path.

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index ec4c3d82b4d6..05e4ae934b23 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -132,6 +132,11 @@ alternative_endif
 	ubfx	x11, x11, #1, #1
 	msr	oslar_el1, x11
 	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
+
+alternative_if ARM64_HAS_RAS_EXTN
+	msr_s	SYS_DISR_EL1, xzr
+alternative_else_nop_endif
+
 	isb
 	ret
 ENDPROC(cpu_do_resume)
-- 
cgit v1.2.3


From 6b88a32c7af68895134872cdec3b6bfdb532d94e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 10 Jan 2018 13:18:30 +0000
Subject: arm64: kpti: Fix the interaction between ASID switching and software
 PAN

With ARM64_SW_TTBR0_PAN enabled, the exception entry code checks the
active ASID to decide whether user access was enabled (non-zero ASID)
when the exception was taken. On return from exception, if user access
was previously disabled, it re-instates TTBR0_EL1 from the per-thread
saved value (updated in switch_mm() or efi_set_pgd()).

Commit 7655abb95386 ("arm64: mm: Move ASID from TTBR0 to TTBR1") makes a
TTBR0_EL1 + ASID switching non-atomic. Subsequently, commit 27a921e75711
("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN") changes the
__uaccess_ttbr0_disable() function and asm macro to first write the
reserved TTBR0_EL1 followed by the ASID=0 update in TTBR1_EL1. If an
exception occurs between these two, the exception return code will
re-instate a valid TTBR0_EL1. Similar scenario can happen in
cpu_switch_mm() between setting the reserved TTBR0_EL1 and the ASID
update in cpu_do_switch_mm().

This patch reverts the entry.S check for ASID == 0 to TTBR0_EL1 and
disables the interrupts around the TTBR0_EL1 and ASID switching code in
__uaccess_ttbr0_disable(). It also ensures that, when returning from the
EFI runtime services, efi_set_pgd() doesn't leave a non-zero ASID in
TTBR1_EL1 by using uaccess_ttbr0_{enable,disable}.

The accesses to current_thread_info()->ttbr0 are updated to use
READ_ONCE/WRITE_ONCE.

As a safety measure, __uaccess_ttbr0_enable() always masks out any
existing non-zero ASID TTBR1_EL1 before writing in the new ASID.

Fixes: 27a921e75711 ("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN")
Acked-by: Will Deacon <will.deacon@arm.com>
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: James Morse <james.morse@arm.com>
Tested-by: James Morse <james.morse@arm.com>
Co-developed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/cache.S | 2 +-
 arch/arm64/mm/proc.S  | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 6cd20a8c0952..91464e7f77cc 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -72,7 +72,7 @@ USER(9f, ic	ivau, x4	)		// invalidate I line PoU
 	isb
 	mov	x0, #0
 1:
-	uaccess_ttbr0_disable x1
+	uaccess_ttbr0_disable x1, x2
 	ret
 9:
 	mov	x0, #-EFAULT
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 05e4ae934b23..c6a12073ef46 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -153,6 +153,9 @@ ENDPROC(cpu_do_resume)
 ENTRY(cpu_do_switch_mm)
 	mrs	x2, ttbr1_el1
 	mmid	x1, x1				// get mm->context.id
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	bfi	x0, x1, #48, #16		// set the ASID field in TTBR0
+#endif
 	bfi	x2, x1, #48, #16		// set the ASID
 	msr	ttbr1_el1, x2			// in TTBR1 (since TCR.A1 is set)
 	isb
-- 
cgit v1.2.3


From e9eaa8052fe71b95f4fea6072fa3e0b2cf0b620f Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Thu, 18 Jan 2018 19:13:11 +0000
Subject: arm64: mm: ignore memory above supported physical address size

When booting a kernel without 52-bit PA support (e.g. a kernel with 4k
pages) on a system with 52-bit memory, the kernel will currently try to
use the 52-bit memory and crash. Fix this by ignoring any memory higher
than what the kernel supports.

Fixes: f77d281713d4 ("arm64: enable 52-bit physical address support")
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 672094ed7e07..285745b2ca38 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -366,6 +366,9 @@ void __init arm64_memblock_init(void)
 	/* Handle linux,usable-memory-range property */
 	fdt_enforce_memory_region();
 
+	/* Remove memory above our supported physical address size */
+	memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX);
+
 	/*
 	 * Ensure that the linear region takes up exactly half of the kernel
 	 * virtual address space. This way, we can distinguish a linear address
-- 
cgit v1.2.3


From a8e4c0a919ae310944ed2c9ace11cf3ccd8a609b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 19 Jan 2018 15:42:09 +0000
Subject: arm64: Move BP hardening to check_and_switch_context

We call arm64_apply_bp_hardening() from post_ttbr_update_workaround,
which has the unexpected consequence of being triggered on every
exception return to userspace when ARM64_SW_TTBR0_PAN is selected,
even if no context switch actually occured.

This is a bit suboptimal, and it would be more logical to only
invalidate the branch predictor when we actually switch to
a different mm.

In order to solve this, move the call to arm64_apply_bp_hardening()
into check_and_switch_context(), where we're guaranteed to pick
a different mm context.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/context.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index ff99a880a730..301417ae2ba8 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -234,6 +234,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
 switch_mm_fastpath:
+
+	arm64_apply_bp_hardening();
+
 	/*
 	 * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
 	 * emulating PAN.
@@ -249,8 +252,6 @@ asmlinkage void post_ttbr_update_workaround(void)
 			"ic iallu; dsb nsh; isb",
 			ARM64_WORKAROUND_CAVIUM_27456,
 			CONFIG_CAVIUM_ERRATUM_27456));
-
-	arm64_apply_bp_hardening();
 }
 
 static int asids_init(void)
-- 
cgit v1.2.3


From ec89ab50a03a33a4a648869e868b1964354fb2d1 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Wed, 24 Jan 2018 08:27:08 +0000
Subject: arm64: Fix TTBR + PAN + 52-bit PA logic in cpu_do_switch_mm

In cpu_do_switch_mm(.) with ARM64_SW_TTBR0_PAN=y we apply phys_to_ttbr
to a value that already has an ASID inserted into the upper bits. For
52-bit PA configurations this then can give us TTBR0_EL1 registers that
cause translation table walks to attempt to access non-zero PA[51:48]
spuriously. Ultimately leading to a Synchronous External Abort on level
1 translation.

This patch re-arranges the logic in cpu_do_switch_mm(.) such that
phys_to_ttbr is called before the ASID is inserted into the TTBR0 value.

Fixes: 6b88a32c7af6 ("arm64: kpti: Fix the interaction between ASID switching and software PAN")
Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Kristina Martsenko <kristina.martsenko@arm.com>
Reviewed-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm64/mm')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c6a12073ef46..9f177aac6390 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -153,14 +153,14 @@ ENDPROC(cpu_do_resume)
 ENTRY(cpu_do_switch_mm)
 	mrs	x2, ttbr1_el1
 	mmid	x1, x1				// get mm->context.id
+	phys_to_ttbr x0, x3
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
-	bfi	x0, x1, #48, #16		// set the ASID field in TTBR0
+	bfi	x3, x1, #48, #16		// set the ASID field in TTBR0
 #endif
 	bfi	x2, x1, #48, #16		// set the ASID
 	msr	ttbr1_el1, x2			// in TTBR1 (since TCR.A1 is set)
 	isb
-	phys_to_ttbr x0, x2
-	msr	ttbr0_el1, x2			// now update TTBR0
+	msr	ttbr0_el1, x3			// now update TTBR0
 	isb
 	b	post_ttbr_update_workaround	// Back to C code...
 ENDPROC(cpu_do_switch_mm)
-- 
cgit v1.2.3