From e8a75963a4b9433dca55286e222f4dd1cc1ca76c Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 28 Aug 2015 08:39:57 +0530
Subject: ARC: mm: switch pgtable_to to pte_t *

ARC is the only arch with unsigned long type (vs. struct page *).
Historically this was done to avoid the page_address() calls in various
arch hooks which need to get the virtual/logical address of the table.

Some arches alternately define it as pte_t *, and is as efficient as
unsigned long (generated code doesn't change)

Suggested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/page.h    | 4 ++--
 arch/arc/include/asm/pgalloc.h | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 9c8aa41e45c2..2994cac1069e 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -43,7 +43,6 @@ typedef struct {
 typedef struct {
 	unsigned long pgprot;
 } pgprot_t;
-typedef unsigned long pgtable_t;
 
 #define pte_val(x)      ((x).pte)
 #define pgd_val(x)      ((x).pgd)
@@ -60,7 +59,6 @@ typedef unsigned long pgtable_t;
 typedef unsigned long pte_t;
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;
-typedef unsigned long pgtable_t;
 
 #define pte_val(x)	(x)
 #define pgd_val(x)	(x)
@@ -71,6 +69,8 @@ typedef unsigned long pgtable_t;
 
 #endif
 
+typedef pte_t * pgtable_t;
+
 #define ARCH_PFN_OFFSET     (CONFIG_LINUX_LINK_BASE >> PAGE_SHIFT)
 
 #define pfn_valid(pfn)      (((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 81208bfd9dcb..9149b5ca26d7 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pgtable_t pte_pg;
 	struct page *page;
 
-	pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
+	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
 	if (!pte_pg)
 		return 0;
 	memzero((void *)pte_pg, PTRS_PER_PTE * 4);
@@ -128,12 +128,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
 {
 	pgtable_page_dtor(virt_to_page(ptep));
-	free_pages(ptep, __get_order_pte());
+	free_pages((unsigned long)ptep, __get_order_pte());
 }
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
 #define check_pgt_cache()   do { } while (0)
-#define pmd_pgtable(pmd) pmd_page_vaddr(pmd)
+#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
 
 #endif /* _ASM_ARC_PGALLOC_H */
-- 
cgit v1.2.3


From 129cbed54a8b3f80f0eaf49acb14fe835587f6f3 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 Dec 2013 12:05:05 +0530
Subject: ARC: mm: pte flags comsetic cleanups, comments

No semantical changes

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/pgtable.h | 37 ++++++++++++++++---------------------
 arch/arc/mm/tlbex.S            |  2 +-
 2 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 1281718802f7..481359fe56ae 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -60,7 +60,7 @@
 #define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
 #define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
 #define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_MODIFIED      (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_DIRTY         (1<<6)	/* Page modified (dirty) (S) */
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
 
@@ -71,7 +71,7 @@
 #define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
 #define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
 #define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
-#define _PAGE_MODIFIED      (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
 
 #if (CONFIG_ARC_MMU_VER >= 4)
 #define _PAGE_WTHRU         (1<<7)	/* Page cache mode write-thru (H) */
@@ -92,21 +92,16 @@
 #define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
 			_PAGE_GLOBAL | _PAGE_PRESENT)
 
-#ifdef CONFIG_ARC_CACHE_PAGES
-#define _PAGE_DEF_CACHEABLE _PAGE_CACHEABLE
-#else
-#define _PAGE_DEF_CACHEABLE (0)
+#ifndef CONFIG_ARC_CACHE_PAGES
+#undef _PAGE_CACHEABLE
+#define _PAGE_CACHEABLE 0
 #endif
 
-/* Helper for every "user" page
- * -kernel can R/W/X
- * -by default cached, unless config otherwise
- * -present in memory
- */
-#define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE)
+/* Defaults for every user page */
+#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
 
 /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED)
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 
 /* More Abbrevaited helpers */
 #define PAGE_U_NONE     __pgprot(___DEF)
@@ -122,7 +117,7 @@
  * user vaddr space - visible in all addr spaces, but kernel mode only
  * Thus Global, all-kernel-access, no-user-access, cached
  */
-#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE)
+#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
 
 /* ioremap */
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
@@ -191,16 +186,16 @@
 
 /* Optimal Sizing of Pg Tbl - based on MMU page size */
 #if defined(CONFIG_ARC_PAGE_SIZE_8K)
-#define BITS_FOR_PTE	8
+#define BITS_FOR_PTE	8		/* 11:8:13 */
 #elif defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define BITS_FOR_PTE	8
+#define BITS_FOR_PTE	8		/* 10:8:14 */
 #elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define BITS_FOR_PTE	9
+#define BITS_FOR_PTE	9		/* 11:9:12 */
 #endif
 
 #define BITS_FOR_PGD	(32 - BITS_FOR_PTE - BITS_IN_PAGE)
 
-#define PGDIR_SHIFT	(BITS_FOR_PTE + BITS_IN_PAGE)
+#define PGDIR_SHIFT	(32 - BITS_FOR_PGD)
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)	/* vaddr span, not PDG sz */
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -295,7 +290,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 /* Zoo of pte_xxx function */
 #define pte_read(pte)		(pte_val(pte) & _PAGE_READ)
 #define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte)		(pte_val(pte) & _PAGE_MODIFIED)
+#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
 #define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
 #define pte_special(pte)	(0)
 
@@ -304,8 +299,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 
 PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
 PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
-PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_MODIFIED));
-PTE_BIT_FUNC(mkdirty,	|= (_PAGE_MODIFIED));
+PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
 PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
 PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
 PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index f6f4c3cb505d..b8b014c6904d 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -365,7 +365,7 @@ ENTRY(EV_TLBMissD)
 	lr      r3, [ecr]
 	or      r0, r0, _PAGE_ACCESSED        ; Accessed bit always
 	btst_s  r3,  ECR_C_BIT_DTLB_ST_MISS   ; See if it was a Write Access ?
-	or.nz   r0, r0, _PAGE_MODIFIED        ; if Write, set Dirty bit as well
+	or.nz   r0, r0, _PAGE_DIRTY           ; if Write, set Dirty bit as well
 	st_s    r0, [r1]                      ; Write back PTE
 
 	CONV_PTE_TO_TLB
-- 
cgit v1.2.3


From 24830fc782a3a740209d39cb27abbf5a9763f61f Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 16 Feb 2015 19:01:29 +0530
Subject: ARC: mm: Introduce PTE_SPECIAL

Needed for THP, but will also come in handy for fast GUP later

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/pgtable.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 481359fe56ae..431a83329324 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -61,6 +61,7 @@
 #define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
 #define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
 #define _PAGE_DIRTY         (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_SPECIAL       (1<<7)
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
 
@@ -72,6 +73,7 @@
 #define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
 #define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
 #define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_SPECIAL       (1<<6)
 
 #if (CONFIG_ARC_MMU_VER >= 4)
 #define _PAGE_WTHRU         (1<<7)	/* Page cache mode write-thru (H) */
@@ -292,7 +294,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 #define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
 #define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
 #define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
-#define pte_special(pte)	(0)
+#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
 
 #define PTE_BIT_FUNC(fn, op) \
 	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
@@ -305,8 +307,9 @@ PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
 PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
 PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
 PTE_BIT_FUNC(mkexec,	|= (_PAGE_EXECUTE));
+PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
 
-static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+#define __HAVE_ARCH_PTE_SPECIAL
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-- 
cgit v1.2.3


From fe6c1b8611aa3a79a937a5e3b85a16576b6ad159 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 8 Jul 2014 18:43:47 +0530
Subject: ARCv2: mm: THP support

MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
support.

Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
new bit "SZ" in TLB page desciptor to distinguish between them.
Super Page size is configurable in hardware (4K to 16M), but fixed once
RTL builds.

The exact THP size a Linx configuration will support is a function of:
 - MMU page size (typical 8K, RTL fixed)
 - software page walker address split between PGD:PTE:PFN (typical
   11:8:13, but can be changed with 1 line)

So for above default, THP size supported is 8K * 256 = 2M

Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
reduces to 1 level (as PTE is folded into PGD and canonically referred
to as PMD).

Thus thp PMD accessors are implemented in terms of PTE (just like sparc)

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig                |  4 ++
 arch/arc/include/asm/hugepage.h | 77 +++++++++++++++++++++++++++++++++++++++
 arch/arc/include/asm/page.h     |  1 +
 arch/arc/include/asm/pgtable.h  | 16 +++++++-
 arch/arc/mm/tlb.c               | 81 +++++++++++++++++++++++++++++++++++++++++
 arch/arc/mm/tlbex.S             | 19 ++++++++--
 6 files changed, 192 insertions(+), 6 deletions(-)
 create mode 100644 arch/arc/include/asm/hugepage.h

(limited to 'arch/arc')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 78c0621d5819..5912006391ed 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -76,6 +76,10 @@ config STACKTRACE_SUPPORT
 config HAVE_LATENCYTOP_SUPPORT
 	def_bool y
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	def_bool y
+	depends on ARC_MMU_V4
+
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
new file mode 100644
index 000000000000..1d0700c32b82
--- /dev/null
+++ b/arch/arc/include/asm/hugepage.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#ifndef _ASM_ARC_HUGEPAGE_H
+#define _ASM_ARC_HUGEPAGE_H
+
+#include <linux/types.h>
+#include <asm-generic/pgtable-nopmd.h>
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
+
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkhuge(pmd)		pte_pmd(pte_mkhuge(pmd_pte(pmd)))
+#define pmd_mknotpresent(pmd)	pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
+#define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
+#define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
+
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+#define pmd_special(pmd)	pte_special(pmd_pte(pmd))
+
+#define mk_pmd(page, prot)	pte_pmd(mk_pte(page, prot))
+
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) & _PAGE_HW_SZ)
+#define pmd_trans_splitting(pmd)	(pmd_trans_huge(pmd) && pmd_special(pmd))
+
+#define pfn_pmd(pfn, prot)	(__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+        /*
+         * open-coded pte_modify() with additional retaining of HW_SZ bit
+         * so that pmd_trans_huge() remains true for this PMD
+         */
+        return __pmd((pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HW_SZ)) | pgprot_val(newprot));
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+
+#define has_transparent_hugepage() 1
+
+/* Generic variants assume pgtable_t is struct page *, hence need for these */
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				       pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#endif
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 2994cac1069e..37706837ef75 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -64,6 +64,7 @@ typedef unsigned long pgprot_t;
 #define pgd_val(x)	(x)
 #define pgprot_val(x)	(x)
 #define __pte(x)	(x)
+#define __pgd(x)	(x)
 #define __pgprot(x)	(x)
 #define pte_pgprot(x)	(x)
 
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 431a83329324..336267f2e9d9 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -83,11 +83,13 @@
 #define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
 
 #if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_SZ            (1<<10)	/* Page Size indicator (H) */
+#define _PAGE_HW_SZ         (1<<10)	/* Page Size indicator (H): 0 normal, 1 super */
 #endif
 
 #define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
 					   usable for shared TLB entries (H) */
+
+#define _PAGE_UNUSED_BIT    (1<<12)
 #endif
 
 /* vmalloc permissions */
@@ -99,6 +101,10 @@
 #define _PAGE_CACHEABLE 0
 #endif
 
+#ifndef _PAGE_HW_SZ
+#define _PAGE_HW_SZ	0
+#endif
+
 /* Defaults for every user page */
 #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
 
@@ -125,7 +131,7 @@
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
 
 /* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT)
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
 #define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
 
@@ -299,6 +305,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 #define PTE_BIT_FUNC(fn, op) \
 	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
 
+PTE_BIT_FUNC(mknotpresent,	&= ~(_PAGE_PRESENT));
 PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
 PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
 PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
@@ -308,6 +315,7 @@ PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
 PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
 PTE_BIT_FUNC(mkexec,	|= (_PAGE_EXECUTE));
 PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
 
 #define __HAVE_ARCH_PTE_SPECIAL
 
@@ -381,6 +389,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
  * remap a physical page `pfn' of size `size' with page protection `prot'
  * into virtual address `from'
  */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
 #include <asm-generic/pgtable.h>
 
 /* to cope with aliasing VIPT cache */
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 2c7ce8bb7475..eb1bdc40e24f 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -256,6 +256,18 @@ noinline void local_flush_tlb_all(void)
 		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 	}
 
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+		const int stlb_idx = 0x800;
+
+		/* Blank sTLB entry */
+		write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ);
+
+		for (entry = stlb_idx; entry < stlb_idx + 16; entry++) {
+			write_aux_reg(ARC_REG_TLBINDEX, entry);
+			write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+		}
+	}
+
 	utlb_invalidate();
 
 	local_irq_restore(flags);
@@ -580,6 +592,75 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 	}
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+/*
+ * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
+ * support.
+ *
+ * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
+ * new bit "SZ" in TLB page desciptor to distinguish between them.
+ * Super Page size is configurable in hardware (4K to 16M), but fixed once
+ * RTL builds.
+ *
+ * The exact THP size a Linx configuration will support is a function of:
+ *  - MMU page size (typical 8K, RTL fixed)
+ *  - software page walker address split between PGD:PTE:PFN (typical
+ *    11:8:13, but can be changed with 1 line)
+ * So for above default, THP size supported is 8K * (2^8) = 2M
+ *
+ * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
+ * reduces to 1 level (as PTE is folded into PGD and canonically referred
+ * to as PMD).
+ * Thus THP PMD accessors are implemented in terms of PTE (just like sparc)
+ */
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd)
+{
+	pte_t pte = __pte(pmd_val(*pmd));
+	update_mmu_cache(vma, addr, &pte);
+}
+
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	if (!pmd_huge_pte(mm, pmdp))
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	pgtable = pmd_huge_pte(mm, pmdp);
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		pmd_huge_pte(mm, pmdp) = NULL;
+	else {
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+
+	pte_val(pgtable[0]) = 0;
+	pte_val(pgtable[1]) = 0;
+
+	return pgtable;
+}
+
+#endif
+
 /* Read the Cache Build Confuration Registers, Decode them and save into
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index b8b014c6904d..552594897655 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -205,10 +205,18 @@ ex_saved_reg1:
 #endif
 
 	lsr     r0, r2, PGDIR_SHIFT     ; Bits for indexing into PGD
-	ld.as   r1, [r1, r0]            ; PGD entry corresp to faulting addr
-	and.f   r1, r1, PAGE_MASK       ; Ignoring protection and other flags
-	;   contains Ptr to Page Table
-	bz.d    do_slow_path_pf         ; if no Page Table, do page fault
+	ld.as   r3, [r1, r0]            ; PGD entry corresp to faulting addr
+	tst	r3, r3
+	bz	do_slow_path_pf         ; if no Page Table, do page fault
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	and.f	0, r3, _PAGE_HW_SZ	; Is this Huge PMD (thp)
+	add2.nz	r1, r1, r0
+	bnz.d	2f		; YES: PGD == PMD has THP PTE: stop pgd walk
+	mov.nz	r0, r3
+
+#endif
+	and	r1, r3, PAGE_MASK
 
 	; Get the PTE entry: The idea is
 	; (1) x = addr >> PAGE_SHIFT 	-> masks page-off bits from @fault-addr
@@ -219,6 +227,9 @@ ex_saved_reg1:
 	lsr     r0, r2, (PAGE_SHIFT - 2)
 	and     r0, r0, ( (PTRS_PER_PTE - 1) << 2)
 	ld.aw   r0, [r1, r0]            ; get PTE and PTE ptr for fault addr
+
+2:
+
 #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
 	and.f 0, r0, _PAGE_PRESENT
 	bz   1f
-- 
cgit v1.2.3


From 6ce187985f31c441f7fc10a4d265182d05bc7ad3 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 12 Mar 2015 19:48:03 +0530
Subject: ARCv2: mm: THP: boot validation/reporting

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlb.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/arc')

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index eb1bdc40e24f..91905b1c3d72 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -736,7 +736,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 
 	if (p_mmu->s_pg_sz_m)
 		scnprintf(super_pg, 64, "%dM Super Page%s, ",
-			  p_mmu->s_pg_sz_m, " (not used)");
+			  p_mmu->s_pg_sz_m,
+			  IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? "" : " (not used)");
 
 	n += scnprintf(buf + n, len - n,
 		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
@@ -771,6 +772,11 @@ void arc_mmu_init(void)
 	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
 
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE))
+		panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
+		      (unsigned long)TO_MB(HPAGE_PMD_SIZE));
+
 	/* Enable the MMU */
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 
-- 
cgit v1.2.3


From 722fe8fd365a08bd53e9dd105009ab810107b02d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 27 Feb 2015 19:36:35 +0530
Subject: ARCv2: mm: THP: Implement flush_pmd_tlb_range() optimization

Implement the TLB flush routine to evict a sepcific Super TLB entry,
vs. moving to a new ASID on every such flush.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/hugepage.h |  4 ++++
 arch/arc/mm/tlb.c               | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 1d0700c32b82..c5094de86403 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -74,4 +74,8 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 #define __HAVE_ARCH_PGTABLE_WITHDRAW
 extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
 
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+				unsigned long end);
+
 #endif
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 91905b1c3d72..005090e425f4 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -659,6 +659,26 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 	return pgtable;
 }
 
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			 unsigned long end)
+{
+	unsigned int cpu;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) {
+		unsigned int asid = hw_pid(vma->vm_mm, cpu);
+
+		/* No need to loop here: this will always be for 1 Huge Page */
+		tlb_entry_erase(start | _PAGE_HW_SZ | asid);
+	}
+
+	local_irq_restore(flags);
+}
+
 #endif
 
 /* Read the Cache Build Confuration Registers, Decode them and save into
-- 
cgit v1.2.3


From c7119d56d2755fc2770b0e2c1c4385e10f4c9161 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 15 Oct 2015 08:04:45 +0530
Subject: ARCv2: mm: THP: flush_pmd_tlb_range make SMP safe

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/tlbflush.h |  5 +++++
 arch/arc/mm/tlb.c               | 27 +++++++++++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
index 71c7b2e4b874..1fe9c8c80280 100644
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -17,6 +17,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void local_flush_tlb_range(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end);
+void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			       unsigned long end);
 
 #ifndef CONFIG_SMP
 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
@@ -24,6 +26,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma,
 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
 #define flush_tlb_all()			local_flush_tlb_all()
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
+#define flush_pmd_tlb_range(vma, s, e)	local_flush_pmd_tlb_range(vma, s, e)
 #else
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 							 unsigned long end);
@@ -31,5 +34,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+
 #endif /* CONFIG_SMP */
 #endif
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 005090e425f4..29b587835974 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -421,6 +421,15 @@ static inline void ipi_flush_tlb_range(void *arg)
 	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ipi_flush_pmd_tlb_range(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+#endif
+
 static inline void ipi_flush_tlb_kernel_range(void *arg)
 {
 	struct tlb_args *ta = (struct tlb_args *)arg;
@@ -461,6 +470,20 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			 unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1);
+}
+#endif
+
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
 	struct tlb_args ta = {
@@ -659,8 +682,8 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 	return pgtable;
 }
 
-void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
-			 unsigned long end)
+void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			       unsigned long end)
 {
 	unsigned int cpu;
 	unsigned long flags;
-- 
cgit v1.2.3


From 9dbd3d9bfd56707f9b1ccc301506e2fac0e95795 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sat, 5 Sep 2015 22:47:30 +0530
Subject: ARC: [arcompact] don't check for hard isr calling local_irq_enable()

Historically this was done by ARC IDE driver, which is long gone.
IRQ core is pretty robust now and already checks if IRQs are enabled
in hard ISRs. Thus no point in checking this in arch code, for every
call of irq enabled.

Further if some driver does do that - let it bring down the system so we
notice/fix this sooner than covering up for sucker

This makes local_irq_enable() - for L1 only case atleast simple enough
so we can inline it.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/irqflags-compact.h | 14 ++++++-
 arch/arc/kernel/intc-compact.c          | 73 +++------------------------------
 2 files changed, 18 insertions(+), 69 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index aa805575c320..a9490841c801 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -91,7 +91,19 @@ static inline void arch_local_irq_restore(unsigned long flags)
 /*
  * Unconditionally Enable IRQs
  */
-extern void arch_local_irq_enable(void);
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"	lr   %0, [status32]	\n"
+	"	or   %0, %0, %1		\n"
+	"	flag %0			\n"
+	: "=&r"(temp)
+	: "n"((STATUS_E1_MASK | STATUS_E2_MASK))
+	: "cc", "memory");
+}
+
 
 /*
  * Unconditionally Disable IRQs
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 039fac30b5c1..a1669cf2a277 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -148,78 +148,15 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
 
 void arch_local_irq_enable(void)
 {
-
 	unsigned long flags = arch_local_save_flags();
 
-	/* Allow both L1 and L2 at the onset */
-	flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-
-	/* Called from hard ISR (between irq_enter and irq_exit) */
-	if (in_irq()) {
-
-		/* If in L2 ISR, don't re-enable any further IRQs as this can
-		 * cause IRQ priorities to get upside down. e.g. it could allow
-		 * L1 be taken while in L2 hard ISR which is wrong not only in
-		 * theory, it can also cause the dreaded L1-L2-L1 scenario
-		 */
-		if (flags & STATUS_A2_MASK)
-			flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
-
-		/* Even if in L1 ISR, allowe Higher prio L2 IRQs */
-		else if (flags & STATUS_A1_MASK)
-			flags &= ~(STATUS_E1_MASK);
-	}
-
-	/* called from soft IRQ, ideally we want to re-enable all levels */
-
-	else if (in_softirq()) {
-
-		/* However if this is case of L1 interrupted by L2,
-		 * re-enabling both may cause whaco L1-L2-L1 scenario
-		 * because ARC700 allows level 1 to interrupt an active L2 ISR
-		 * Thus we disable both
-		 * However some code, executing in soft ISR wants some IRQs
-		 * to be enabled so we re-enable L2 only
-		 *
-		 * How do we determine L1 intr by L2
-		 *  -A2 is set (means in L2 ISR)
-		 *  -E1 is set in this ISR's pt_regs->status32 which is
-		 *      saved copy of status32_l2 when l2 ISR happened
-		 */
-		struct pt_regs *pt = get_irq_regs();
-
-		if ((flags & STATUS_A2_MASK) && pt &&
-		    (pt->status32 & STATUS_A1_MASK)) {
-			/*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
-			flags &= ~(STATUS_E1_MASK);
-		}
-	}
+	if (flags & STATUS_A2_MASK)
+		flags |= STATUS_E2_MASK;
+	else if (flags & STATUS_A1_MASK)
+		flags |= STATUS_E1_MASK;
 
 	arch_local_irq_restore(flags);
 }
 
-#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
-
-/*
- * Simpler version for only 1 level of interrupt
- * Here we only Worry about Level 1 Bits
- */
-void arch_local_irq_enable(void)
-{
-	unsigned long flags;
-
-	/*
-	 * ARC IDE Drivers tries to re-enable interrupts from hard-isr
-	 * context which is simply wrong
-	 */
-	if (in_irq()) {
-		WARN_ONCE(1, "IRQ enabled from hard-isr");
-		return;
-	}
-
-	flags = arch_local_save_flags();
-	flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-	arch_local_irq_restore(flags);
-}
-#endif
 EXPORT_SYMBOL(arch_local_irq_enable);
+#endif
-- 
cgit v1.2.3


From 55a2ae775ab4fe7aefa736e0fae6b8d4bd8aaab5 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sat, 5 Sep 2015 23:08:31 +0530
Subject: ARC: [arcompact] entry.S: Improve early return from exception

The requirement is to
 - Reenable Exceptions (AE cleared)
 - Reenable Interrupts (E1/E2 set)

We need to do wiggle these bits into ERSTATUS and call RTIE.

Prev version used the pre-exception STATUS32 as starting point for what
goes into ERSTATUS. This required explicit fixups of U/DE/L bits.

Instead, use the current (in-exception) STATUS32 as starting point.
Being in exception handler U/DE/L can be safely assumed to be correct.
Only AE/E1/E2 need to be fixed.

So the new implementation is slightly better
 -Avoids read form memory
 -Is 4 bytes smaller for the typical 1 level of intr configuration
 -Depicts the semantics more clearly

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/entry-compact.h    | 13 ++++++-------
 arch/arc/include/asm/irqflags-compact.h |  2 ++
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 415443c2a8c4..1aff3be91075 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -110,13 +110,12 @@
 
 .macro FAKE_RET_FROM_EXCPN
 
-	ld  r9, [sp, PT_status32]
-	bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK)
-	bset  r9, r9, STATUS_L_BIT
-	sr  r9, [erstatus]
-	mov r9, 55f
-	sr  r9, [eret]
-
+	lr	r9, [status32]
+	bclr	r9, r9, STATUS_AE_BIT
+	or	r9, r9, (STATUS_E1_MASK|STATUS_E2_MASK)
+	sr	r9, [erstatus]
+	mov	r9, 55f
+	sr	r9, [eret]
 	rtie
 55:
 .endm
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index a9490841c801..d8c608174617 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -23,11 +23,13 @@
 #define STATUS_E2_BIT		2	/* Int 2 enable */
 #define STATUS_A1_BIT		3	/* Int 1 active */
 #define STATUS_A2_BIT		4	/* Int 2 active */
+#define STATUS_AE_BIT		5	/* Exception active */
 
 #define STATUS_E1_MASK		(1<<STATUS_E1_BIT)
 #define STATUS_E2_MASK		(1<<STATUS_E2_BIT)
 #define STATUS_A1_MASK		(1<<STATUS_A1_BIT)
 #define STATUS_A2_MASK		(1<<STATUS_A2_BIT)
+#define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
 #define STATUS_IE_MASK		(STATUS_E1_MASK | STATUS_E2_MASK)
 
 /* Other Interrupt Handling related Aux regs */
-- 
cgit v1.2.3


From 5f888087455c5199195c2ba17b91ac7285a33921 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sun, 6 Sep 2015 19:11:12 +0530
Subject: ARC: [arcompact] entry.S: Document preemption games for L2 intr

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/entry-compact.S | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch/arc')

diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 15d457b4403a..d9087a1236eb 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -175,12 +175,25 @@ ENTRY(handle_interrupt_level2)
 
 	;------------------------------------------------------
 	; if L2 IRQ interrupted a L1 ISR, disable preemption
+	;
+	; This is to avoid a potential L1-L2-L1 scenario
+	;  -L1 IRQ taken
+	;  -L2 interrupts L1 (before L1 ISR could run)
+	;  -preemption off IRQ, user task in syscall picked to run
+	;  -RTIE to userspace
+	;	Returns from L2 context fine
+	;	But both L1 and L2 re-enabled, so another L1 can be taken
+	;	while prev L1 is still unserviced
+	;
 	;------------------------------------------------------
 
+	; L2 interrupting L1 implies both L2 and L1 active
+	; However both A2 and A1 are NOT set in STATUS32, thus
+	; need to check STATUS32_L2 to determine if L1 was active
+
 	ld r9, [sp, PT_status32]        ; get statu32_l2 (saved in pt_regs)
 	bbit0 r9, STATUS_A1_BIT, 1f     ; L1 not active when L2 IRQ, so normal
 
-	; A1 is set in status32_l2
 	; bump thread_info->preempt_count (Disable preemption)
 	GET_CURR_THR_INFO_FROM_SP   r10
 	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-- 
cgit v1.2.3


From 9fabcc636bf57dcb9c6fc5b1f34861c548944fd4 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 8 Oct 2015 17:52:27 +0530
Subject: ARC: [arcompact] entry.S: Elide extra check/branch in exception ret
 path

This is done by improving the laddering logic !

Before:

   if Exception
      goto excep_or_pure_k_ret

   if !Interrupt(L2)
      goto l1_chk
   else
      INTERRUPT_EPILOGUE 2

 l1_chk:
   if !Interrupt(L1)  (i.e. pure kernel mode)
      goto excep_or_pure_k_ret
   else
      INTERRUPT_EPILOGUE 1

 excep_or_pure_k_ret:
   EXCEPTION_EPILOGUE

Now:

   if !Interrupt(L1 or L2) (i.e. exception or pure kernel mode)
      goto excep_or_pure_k_ret

  ; guaranteed to be an interrupt
   if !Interrupt(L2)
      goto l1_ret
   else
      INTERRUPT_EPILOGUE 2

 ; by virtue of above, no need to chk for L1 active
 l1_ret:
    INTERRUPT_EPILOGUE 1

 excep_or_pure_k_ret:
    EXCEPTION_EPILOGUE

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/entry-compact.S | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index d9087a1236eb..5221f194602b 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -333,11 +333,10 @@ END(call_do_page_fault)
 	; Note that we use realtime STATUS32 (not pt_regs->status32) to
 	; decide that.
 
-	; if Returning from Exception
-	btst   r10, STATUS_AE_BIT
-	bnz    .Lexcep_ret
+	and.f	0, r10, (STATUS_A1_MASK|STATUS_A2_MASK)
+	bz	.Lexcep_or_pure_K_ret
 
-	; Not Exception so maybe Interrupts (Level 1 or 2)
+	; Returning from Interrupts (Level 1 or 2)
 
 #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
 
@@ -378,8 +377,7 @@ END(call_do_page_fault)
 	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
 
 149:
-	;return from level 2
-	INTERRUPT_EPILOGUE 2
+	INTERRUPT_EPILOGUE 2	; return from level 2 interrupt
 debug_marker_l2:
 	rtie
 
@@ -387,15 +385,11 @@ not_level2_interrupt:
 
 #endif
 
-	bbit0  r10, STATUS_A1_BIT, .Lpure_k_mode_ret
-
-	;return from level 1
-	INTERRUPT_EPILOGUE 1
+	INTERRUPT_EPILOGUE 1	; return from level 1 interrupt
 debug_marker_l1:
 	rtie
 
-.Lexcep_ret:
-.Lpure_k_mode_ret:
+.Lexcep_or_pure_K_ret:
 
 	;this case is for syscalls or Exceptions or pure kernel mode
 
-- 
cgit v1.2.3


From 5c35ee642a1d1341b225808b53fc69df2245b87e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 29 Sep 2015 16:05:48 +0530
Subject: ARC: make write_aux_reg safer against macro substitution

It was generating warnings when called as write_aux_reg(x, paddr >> 32)

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/arcregs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index d8023bc8d1ad..431e82893fc8 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -120,7 +120,7 @@
 
 /* gcc builtin sr needs reg param to be long immediate */
 #define write_aux_reg(reg_immed, val)		\
-		__builtin_arc_sr((unsigned int)val, reg_immed)
+		__builtin_arc_sr((unsigned int)(val), reg_immed)
 
 #else
 
-- 
cgit v1.2.3


From c583ee4fb013bcf3501b9f10c252ea44cf7c657a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 29 Sep 2015 16:01:13 +0530
Subject: ARC: mm: MMU v1..v3 only selectable for ARCompact ISA based cores

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arc')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 5912006391ed..f50ff986ed60 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -282,6 +282,8 @@ choice
 	default ARC_MMU_V2 if ARC_CPU_750D
 	default ARC_MMU_V4 if ARC_CPU_HS
 
+if ISA_ARCOMPACT
+
 config ARC_MMU_V1
 	bool "MMU v1"
 	help
@@ -301,6 +303,8 @@ config ARC_MMU_V3
 	  Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
 	  Shared Address Spaces (SASID)
 
+endif
+
 config ARC_MMU_V4
 	bool "MMU v4"
 	depends on ISA_ARCV2
-- 
cgit v1.2.3


From b598e17f6a7a3a9bb6e0953ef586ee3697b59fce Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 2 Oct 2015 12:25:35 +0530
Subject: ARC: mm: compute TLB size as needed from ways * sets

This frees up some bits to hold more high level info such as PAE being
present, w/o increasing the size of already bloated cpuinfo struct

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/arcregs.h | 4 ++--
 arch/arc/mm/tlb.c              | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 431e82893fc8..dc9e65a8d722 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -327,8 +327,8 @@ struct bcr_generic {
  */
 
 struct cpuinfo_arc_mmu {
-	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6;
-	unsigned int num_tlb:16, sets:12, ways:4;
+	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:12;
+	unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
 };
 
 struct cpuinfo_arc_cache {
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 29b587835974..25699db016b3 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -240,9 +240,10 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
 
 noinline void local_flush_tlb_all(void)
 {
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
 	unsigned long flags;
 	unsigned int entry;
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	int num_tlb = mmu->sets * mmu->ways;
 
 	local_irq_save(flags);
 
@@ -250,7 +251,7 @@ noinline void local_flush_tlb_all(void)
 	write_aux_reg(ARC_REG_TLBPD1, 0);
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 
-	for (entry = 0; entry < mmu->num_tlb; entry++) {
+	for (entry = 0; entry < num_tlb; entry++) {
 		/* write this entry to the TLB */
 		write_aux_reg(ARC_REG_TLBINDEX, entry);
 		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
@@ -767,8 +768,6 @@ void read_decode_mmu_bcr(void)
 		mmu->u_dtlb = mmu4->u_dtlb * 4;
 		mmu->u_itlb = mmu4->u_itlb * 4;
 	}
-
-	mmu->num_tlb = mmu->sets * mmu->ways;
 }
 
 char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
@@ -785,7 +784,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 	n += scnprintf(buf + n, len - n,
 		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
-		       p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
+		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
 		       IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
 
-- 
cgit v1.2.3


From 964cf28f9d10f4e5229e4365258c292bc5c856b2 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 2 Oct 2015 19:20:27 +0530
Subject: ARC: boot log: move helper macros to header for reuse

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/setup.h | 7 +++++++
 arch/arc/kernel/mcip.c       | 3 +--
 arch/arc/kernel/setup.c      | 4 ----
 arch/arc/mm/cache.c          | 5 ++---
 arch/arc/mm/tlb.c            | 2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 6e3ef5ba4f74..307846691be6 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -33,4 +33,11 @@ extern int root_mountflags, end_mem;
 void setup_processor(void);
 void __init setup_arch_memory(void);
 
+/* Helpers used in arc_*_mumbojumbo routines */
+#define IS_AVAIL1(v, s)		((v) ? s : "")
+#define IS_DISABLED_RUN(v)	((v) ? "" : "(disabled) ")
+#define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
+#define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
+#define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
+
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 4ffd1855f1bd..e48a1331c588 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -12,6 +12,7 @@
 #include <linux/irq.h>
 #include <linux/spinlock.h>
 #include <asm/mcip.h>
+#include <asm/setup.h>
 
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
@@ -122,8 +123,6 @@ struct plat_smp_ops plat_smp_ops = {
 
 void mcip_init_early_smp(void)
 {
-#define IS_AVAIL1(var, str)    ((var) ? str : "")
-
 	struct mcip_bcr {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 		unsigned int pad3:8,
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index cabde9dc0696..68d3e181a82f 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -160,10 +160,6 @@ static const struct cpuinfo_data arc_cpu_tbl[] = {
 	{ {0x00, NULL		} }
 };
 
-#define IS_AVAIL1(v, s)		((v) ? s : "")
-#define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
-#define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
-#define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 
 static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 0d1a6e96839f..ae3b772ecc4d 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -37,7 +37,6 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 	int n = 0;
 	struct cpuinfo_arc_cache *p;
 
-#define IS_USED_RUN(v)		((v) ? "" : "(disabled) ")
 #define PR_CACHE(p, cfg, str)						\
 	if (!(p)->ver)							\
 		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
@@ -47,7 +46,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 			(p)->sz_k, (p)->assoc, (p)->line_len,		\
 			(p)->vipt ? "VIPT" : "PIPT",			\
 			(p)->alias ? " aliasing" : "",			\
-			IS_ENABLED(cfg) ? "" : " (not used)");
+			IS_USED_CFG(cfg));
 
 	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
 	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
@@ -63,7 +62,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 
 	if (ioc_exists)
 		n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
-				IS_USED_RUN(ioc_enable));
+				IS_DISABLED_RUN(ioc_enable));
 
 	return buf;
 }
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 25699db016b3..a69f2078a96d 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -779,7 +779,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 	if (p_mmu->s_pg_sz_m)
 		scnprintf(super_pg, 64, "%dM Super Page%s, ",
 			  p_mmu->s_pg_sz_m,
-			  IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? "" : " (not used)");
+			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 	n += scnprintf(buf + n, len - n,
 		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
-- 
cgit v1.2.3


From d0890ea5b68f63d7b8641455dc5534886fee2fa1 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 2 Oct 2015 19:24:20 +0530
Subject: ARC: boot log: decode more mmu config items

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/arcregs.h |  2 +-
 arch/arc/mm/tlb.c              | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index dc9e65a8d722..7fac7d85ed6a 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -327,7 +327,7 @@ struct bcr_generic {
  */
 
 struct cpuinfo_arc_mmu {
-	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:12;
+	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
 	unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
 };
 
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index a69f2078a96d..2a30c91f7977 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -723,10 +723,10 @@ void read_decode_mmu_bcr(void)
 
 	struct bcr_mmu_3 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4,
+	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
 		     u_itlb:4, u_dtlb:4;
 #else
-	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4,
+	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
 		     ways:4, ver:8;
 #endif
 	} *mmu3;
@@ -747,7 +747,7 @@ void read_decode_mmu_bcr(void)
 
 	if (mmu->ver <= 2) {
 		mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-		mmu->pg_sz_k = TO_KB(PAGE_SIZE);
+		mmu->pg_sz_k = TO_KB(0x2000);
 		mmu->sets = 1 << mmu2->sets;
 		mmu->ways = 1 << mmu2->ways;
 		mmu->u_dtlb = mmu2->u_dtlb;
@@ -759,6 +759,7 @@ void read_decode_mmu_bcr(void)
 		mmu->ways = 1 << mmu3->ways;
 		mmu->u_dtlb = mmu3->u_dtlb;
 		mmu->u_itlb = mmu3->u_itlb;
+		mmu->sasid = mmu3->sasid;
 	} else {
 		mmu4 = (struct bcr_mmu_4 *)&tmp;
 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
@@ -767,6 +768,8 @@ void read_decode_mmu_bcr(void)
 		mmu->ways = mmu4->n_ways * 2;
 		mmu->u_dtlb = mmu4->u_dtlb * 4;
 		mmu->u_itlb = mmu4->u_itlb * 4;
+		mmu->sasid = mmu4->sasid;
+		mmu->pae = mmu4->pae;
 	}
 }
 
@@ -782,11 +785,10 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
+		      "MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n",
 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
-		       p_mmu->u_dtlb, p_mmu->u_itlb,
-		       IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
+		       p_mmu->u_dtlb, p_mmu->u_itlb);
 
 	return buf;
 }
-- 
cgit v1.2.3


From f33e9c434b8ce833bd3dd39436bd0799c3e1d1c5 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 9 Oct 2015 12:16:02 +0530
Subject: ARC: smp: Move default boot kick/wait code out of MCIP into common
 code

For non halt-on-reset case, all cores start of simultaneously in @stext.
Master core0 proceeds with kernel boot, while other spin-wait on
@wake_flag being set by master once it is ready. So NO hardware assist
is needed for master to "kick" the others.

This patch moves this soft implementation out of mcip.c (as there is no
hardware assist) into common smp.c

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/mcip.c | 18 ------------------
 arch/arc/kernel/smp.c  | 46 +++++++++++++++++++++-------------------------
 2 files changed, 21 insertions(+), 43 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index e48a1331c588..e18d36eb0af6 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -97,26 +97,8 @@ static void mcip_ipi_clear(int irq)
 #endif
 }
 
-volatile int wake_flag;
-
-static void mcip_wakeup_cpu(int cpu, unsigned long pc)
-{
-	BUG_ON(cpu == 0);
-	wake_flag = cpu;
-}
-
-void arc_platform_smp_wait_to_boot(int cpu)
-{
-	while (wake_flag != cpu)
-		;
-
-	wake_flag = 0;
-	__asm__ __volatile__("j @first_lines_of_secondary	\n");
-}
-
 struct plat_smp_ops plat_smp_ops = {
 	.info		= smp_cpuinfo_buf,
-	.cpu_kick	= mcip_wakeup_cpu,
 	.ipi_send	= mcip_ipi_send,
 	.ipi_clear	= mcip_ipi_clear,
 };
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index be13d12420ba..f6175fc5f2bb 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -72,35 +72,29 @@ void __init smp_cpus_done(unsigned int max_cpus)
 }
 
 /*
- * After power-up, a non Master CPU needs to wait for Master to kick start it
- *
- * The default implementation halts
- *
- * This relies on platform specific support allowing Master to directly set
- * this CPU's PC (to be @first_lines_of_secondary() and kick start it.
- *
- * In lack of such h/w assist, platforms can override this function
- *   - make this function busy-spin on a token, eventually set by Master
- *     (from arc_platform_smp_wakeup_cpu())
- *   - Once token is available, jump to @first_lines_of_secondary
- *     (using inline asm).
- *
- * Alert: can NOT use stack here as it has not been determined/setup for CPU.
- *        If it turns out to be elaborate, it's better to code it in assembly
- *
+ * Default smp boot helper for Run-on-reset case where all cores start off
+ * together. Non-masters need to wait for Master to start running.
+ * This is implemented using a flag in memory, which Non-masters spin-wait on.
+ * Master sets it to cpu-id of core to "ungate" it.
  */
-void __weak arc_platform_smp_wait_to_boot(int cpu)
+static volatile int wake_flag;
+
+static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
 {
-	/*
-	 * As a hack for debugging - since debugger will single-step over the
-	 * FLAG insn - wrap the halt itself it in a self loop
-	 */
-	__asm__ __volatile__(
-	"1:		\n"
-	"	flag 1	\n"
-	"	b 1b	\n");
+	BUG_ON(cpu == 0);
+	wake_flag = cpu;
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+	while (wake_flag != cpu)
+		;
+
+	wake_flag = 0;
+	__asm__ __volatile__("j @first_lines_of_secondary	\n");
 }
 
+
 const char *arc_platform_smp_cpuinfo(void)
 {
 	return plat_smp_ops.info ? : "";
@@ -161,6 +155,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (plat_smp_ops.cpu_kick)
 		plat_smp_ops.cpu_kick(cpu,
 				(unsigned long)first_lines_of_secondary);
+	else
+		arc_default_smp_cpu_kick(cpu, (unsigned long)NULL);
 
 	/* wait for 1 sec after kicking the secondary */
 	wait_till = jiffies + HZ;
-- 
cgit v1.2.3


From 3971cdc202f638f252e39316d42492ace04cc1b1 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 9 Oct 2015 11:26:12 +0530
Subject: ARC: boot: Support Halt-on-reset and Run-on-reset SMP booting modes

For Run-on-reset, non masters need to spin wait. For Halt-on-reset they
can jump to entry point directly.

Also while at it, made reset vector handler as "the" entry point for
kernel including host debugger based boot (which uses the ELF header
entry point)

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig                | 10 +++++++++
 arch/arc/kernel/entry-arcv2.S   |  2 +-
 arch/arc/kernel/entry-compact.S | 10 +++------
 arch/arc/kernel/head.S          | 47 +++++++++++++++++++++++------------------
 arch/arc/kernel/vmlinux.lds.S   |  2 +-
 5 files changed, 42 insertions(+), 29 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index f50ff986ed60..cc938967282b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -194,6 +194,16 @@ config NR_CPUS
 	range 2 4096
 	default "4"
 
+config ARC_SMP_HALT_ON_RESET
+	bool "Enable Halt-on-reset boot mode"
+	default y if ARC_UBOOT_SUPPORT
+	help
+	  In SMP configuration cores can be configured as Halt-on-reset
+	  or they could all start at same time. For Halt-on-reset, non
+	  masters are parked until Master kicks them so they can start of
+	  at designated entry point. For other case, all jump to common
+	  entry point and spin wait for Master's signal.
+
 endif	#SMP
 
 menuconfig ARC_CACHE
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 8fa76567e402..445e63a10754 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -24,7 +24,7 @@
 	.align 4
 
 # Initial 16 slots are Exception Vectors
-VECTOR	stext			; Restart Vector (jump to entry point)
+VECTOR	res_service		; Reset Vector
 VECTOR	mem_service		; Mem exception
 VECTOR	instr_service		; Instrn Error
 VECTOR	EV_MachineCheck		; Fatal Machine check
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 5221f194602b..59f52035b4ea 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -86,7 +86,7 @@
  */
 
 ; ********* Critical System Events **********************
-VECTOR   res_service             ; 0x0, Restart Vector  (0x0)
+VECTOR   res_service             ; 0x0, Reset Vector	(0x0)
 VECTOR   mem_service             ; 0x8, Mem exception   (0x1)
 VECTOR   instr_service           ; 0x10, Instrn Error   (0x2)
 
@@ -155,13 +155,9 @@ int2_saved_reg:
 ; ---------------------------------------------
 	.section .text, "ax",@progbits
 
-res_service:		; processor restart
-	flag    0x1     ; not implemented
-	nop
-	nop
 
-reserved:		; processor restart
-	rtie            ; jump to processor initializations
+reserved:
+	flag 1		; Unexpected event, halt
 
 ;##################### Interrupt Handling ##############################
 
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 812f95e6ae69..e7fa703c8d5e 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -50,28 +50,37 @@
 .endm
 
 	.section .init.text, "ax",@progbits
-	.type stext, @function
-	.globl stext
-stext:
-	;-------------------------------------------------------------------
-	; Don't clobber r0-r2 yet. It might have bootloader provided info
-	;-------------------------------------------------------------------
+
+;----------------------------------------------------------------
+; Default Reset Handler (jumped into from Reset vector)
+; - Don't clobber r0,r1,r2 as they might have u-boot provided args
+; - Platforms can override this weak version if needed
+;----------------------------------------------------------------
+WEAK(res_service)
+	j	stext
+END(res_service)
+
+;----------------------------------------------------------------
+; Kernel Entry point
+;----------------------------------------------------------------
+ENTRY(stext)
 
 	CPU_EARLY_SETUP
 
 #ifdef CONFIG_SMP
-	; Ensure Boot (Master) proceeds. Others wait in platform dependent way
-	;	IDENTITY Reg [ 3  2  1  0 ]
-	;	(cpu-id)             ^^^	=> Zero for UP ARC700
-	;					=> #Core-ID if SMP (Master 0)
-	; Note that non-boot CPUs might not land here if halt-on-reset and
-	; instead breath life from @first_lines_of_secondary, but we still
-	; need to make sure only boot cpu takes this path.
 	GET_CPU_ID  r5
 	cmp	r5, 0
-	mov.ne	r0, r5
-	jne	arc_platform_smp_wait_to_boot
+	mov.nz	r0, r5
+#ifdef CONFIG_ARC_SMP_HALT_ON_RESET
+	; Non-Master can proceed as system would be booted sufficiently
+	jnz	first_lines_of_secondary
+#else
+	; Non-Masters wait for Master to boot enough and bring them up
+	jnz	arc_platform_smp_wait_to_boot
+#endif
+	; Master falls thru
 #endif
+
 	; Clear BSS before updating any globals
 	; XXX: use ZOL here
 	mov	r5, __bss_start
@@ -102,16 +111,14 @@ stext:
 	GET_TSK_STACK_BASE r9, sp	; r9 = tsk, sp = stack base(output)
 
 	j	start_kernel	; "C" entry point
+END(stext)
 
 #ifdef CONFIG_SMP
 ;----------------------------------------------------------------
 ;     First lines of code run by secondary before jumping to 'C'
 ;----------------------------------------------------------------
 	.section .text, "ax",@progbits
-	.type first_lines_of_secondary, @function
-	.globl first_lines_of_secondary
-
-first_lines_of_secondary:
+ENTRY(first_lines_of_secondary)
 
 	CPU_EARLY_SETUP
 
@@ -126,5 +133,5 @@ first_lines_of_secondary:
 	GET_TSK_STACK_BASE r0, sp
 
 	j	start_kernel_secondary
-
+END(first_lines_of_secondary)
 #endif
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index dd35bde39f69..894e696bddaa 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -12,7 +12,7 @@
 #include <asm/thread_info.h>
 
 OUTPUT_ARCH(arc)
-ENTRY(_stext)
+ENTRY(res_service)
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 jiffies = jiffies_64 + 4;
-- 
cgit v1.2.3


From e0868e6f673d0d2db6a3c3798605e6efb756e61e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 12 Oct 2015 14:58:54 +0530
Subject: ARC: smp: irqchip: handle IPI as percpu irq like timer

The reason this was not done so far was lack of genuine IPI_IRQ for
ARC700, as we don't have a SMP version of core yet (which might change
soon thx to EZChip). Nevertheles to increase the build coverage, we
need to allow CONFIG_SMP for ARC700 and still be able to run it on a
UP platform (nsim or AXS101) with a UP Device Tree (SMP-on-UP)

The build itself requires some define for IPI_IRQ and even a dummy
value is fine since that code won't run anyways.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/irq.h     |  1 +
 arch/arc/kernel/intc-compact.c | 17 ++++++++---------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index bc5103637326..4fd7d62a6e30 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -16,6 +16,7 @@
 #ifdef CONFIG_ISA_ARCOMPACT
 #define TIMER0_IRQ      3
 #define TIMER1_IRQ      4
+#define IPI_IRQ		(NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */
 #else
 #define TIMER0_IRQ      16
 #define TIMER1_IRQ      17
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index a1669cf2a277..06bcedf19b62 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -79,17 +79,16 @@ static struct irq_chip onchip_intc = {
 static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
 			       irq_hw_number_t hw)
 {
-	/*
-	 * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core
-	 *      code doesn't own it (like TIMER0). ISS IDU / ezchip define it
-	 *      in platform header which can't be included here as it goes
-	 *      against multi-platform image philisophy
-	 */
-	if (irq == TIMER0_IRQ)
+	switch (irq) {
+	case TIMER0_IRQ:
+#ifdef CONFIG_SMP
+	case IPI_IRQ:
+#endif
 		irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
-	else
+		break;
+	default:
 		irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
-
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4c82f28617ab9ce938118f0b99156a96c64d3da0 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 13 Oct 2015 08:48:54 +0530
Subject: ARC: remove @init_time, @init_irq platform callbacks

These are not in use for ARC platforms. Moreover DT mechanims exist to
probe them w/o explicit platform calls.

 - clocksource drivers can use CLOCKSOURCE_OF_DECLARE()
 - intc IRQCHIP_DECLARE() calls + cascading inside DT allows external
   intc to be probed automatically

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/mach_desc.h |  6 ------
 arch/arc/kernel/irq.c            | 10 +++++-----
 arch/arc/kernel/time.c           |  3 ---
 3 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'arch/arc')

diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index e8993a2be6c2..7c680910f104 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -23,11 +23,8 @@
  * @dt_compat:		Array of device tree 'compatible' strings
  * 			(XXX: although only 1st entry is looked at)
  * @init_early:		Very early callback [called from setup_arch()]
- * @init_irq:		setup external IRQ controllers [called from init_IRQ()]
  * @init_smp:		for each CPU (e.g. setup IPI)
  * 			[(M):init_IRQ(), (o):start_kernel_secondary()]
- * @init_time:		platform specific clocksource/clockevent registration
- * 			[called from time_init()]
  * @init_machine:	arch initcall level callback (e.g. populate static
  * 			platform devices or parse Devicetree)
  * @init_late:		Late initcall level callback
@@ -36,13 +33,10 @@
 struct machine_desc {
 	const char		*name;
 	const char		**dt_compat;
-
 	void			(*init_early)(void);
-	void			(*init_irq)(void);
 #ifdef CONFIG_SMP
 	void			(*init_smp)(unsigned int);
 #endif
-	void			(*init_time)(void);
 	void			(*init_machine)(void);
 	void			(*init_late)(void);
 
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 2989a7bcf8a8..156489af75e8 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -19,11 +19,11 @@
  */
 void __init init_IRQ(void)
 {
-	/* Any external intc can be setup here */
-	if (machine_desc->init_irq)
-		machine_desc->init_irq();
-
-	/* process the entire interrupt tree in one go */
+	/*
+	 * process the entire interrupt tree in one go
+	 * Any external intc will be setup provided DT chains them
+	 * properly
+	 */
 	irqchip_init();
 
 #ifdef CONFIG_SMP
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 4294761a2b3e..dfad287f1db1 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -285,7 +285,4 @@ void __init time_init(void)
 
 	/* sets up the periodic event timer */
 	arc_local_timer_setup();
-
-	if (machine_desc->init_time)
-		machine_desc->init_time();
 }
-- 
cgit v1.2.3


From e55af4da026ebdb9ded3cb7708b8a8bd7884ad3a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 12 Oct 2015 16:28:55 +0530
Subject: ARC: smp: Introduce smp hook @init_early_smp for Master core

This adds a platform agnostic early SMP init hook which is called on