summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/paravirt.h11
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h17
-rw-r--r--arch/x86/include/asm/pgtable-3level.h30
-rw-r--r--arch/x86/include/asm/pgtable.h140
-rw-r--r--arch/x86/include/asm/pgtable_64.h15
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/mm/pgtable.c31
-rw-r--r--include/asm-generic/pgtable.h80
-rw-r--r--include/asm-generic/tlb.h14
-rw-r--r--include/linux/huge_mm.h83
-rw-r--r--include/linux/mm.h30
-rw-r--r--include/linux/mmu_notifier.h14
-rw-r--r--include/linux/pfn_t.h12
-rw-r--r--mm/gup.c7
-rw-r--r--mm/huge_memory.c249
-rw-r--r--mm/memory.c88
-rw-r--r--mm/pagewalk.c20
-rw-r--r--mm/pgtable-generic.c14
21 files changed, 844 insertions, 18 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index f761142976e5..d0012add6b19 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -571,6 +571,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
bool
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ bool
+
config HAVE_ARCH_HUGE_VMAP
bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 874c1238dffd..33007aa74111 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -109,6 +109,7 @@ config X86
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_CC_STACKPROTECTOR
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index f75fbfe550f2..0489884fdc44 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -475,6 +475,17 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
native_pmd_val(pmd));
}
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ if (sizeof(pudval_t) > sizeof(long))
+ /* 5 arg words */
+ pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
+ else
+ PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
+ native_pud_val(pud));
+}
+
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
pmdval_t val = native_pmd_val(pmd);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index bb2de45a60f2..b060f962d581 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -249,6 +249,8 @@ struct pv_mmu_ops {
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmdval);
+ void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pudval);
void (*pte_update)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index fd74a11959de..a8b96e708c2b 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -21,6 +21,10 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
*pmdp = pmd;
}
+static inline void native_set_pud(pud_t *pudp, pud_t pud)
+{
+}
+
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
native_set_pte(ptep, pte);
@@ -31,6 +35,10 @@ static inline void native_pmd_clear(pmd_t *pmdp)
native_set_pmd(pmdp, __pmd(0));
}
+static inline void native_pud_clear(pud_t *pudp)
+{
+}
+
static inline void native_pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *xp)
{
@@ -55,6 +63,15 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif
+#ifdef CONFIG_SMP
+static inline pud_t native_pudp_get_and_clear(pud_t *xp)
+{
+ return __pud(xchg((pudval_t *)xp, 0));
+}
+#else
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
+#endif
+
/* Bit manipulation helper on pte/pgoff entry */
static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift,
unsigned long mask, unsigned int leftshift)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index cdaa58c9b39e..8f50fb3f04e1 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,6 +121,12 @@ static inline void native_pmd_clear(pmd_t *pmd)
*(tmp + 1) = 0;
}
+#ifndef CONFIG_SMP
+static inline void native_pud_clear(pud_t *pudp)
+{
+}
+#endif
+
static inline void pud_clear(pud_t *pudp)
{
set_pud(pudp, __pud(0));
@@ -176,6 +182,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif
+#ifdef CONFIG_SMP
+union split_pud {
+ struct {
+ u32 pud_low;
+ u32 pud_high;
+ };
+ pud_t pud;
+};
+
+static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
+{
+ union split_pud res, *orig = (union split_pud *)pudp;
+
+ /* xchg acts as a barrier before setting of the high bits */
+ res.pud_low = xchg(&orig->pud_low, 0);
+ res.pud_high = orig->pud_high;
+ orig->pud_high = 0;
+
+ return res.pud;
+}
+#else
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
+#endif
+
/* Encode and de-code a swap entry */
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(x) (((x).val) & 0x1f)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 437feb436efa..1cfb36b8c024 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -46,6 +46,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
+#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)
@@ -128,6 +129,16 @@ static inline int pmd_young(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_ACCESSED;
}
+static inline int pud_dirty(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_DIRTY;
+}
+
+static inline int pud_young(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_ACCESSED;
+}
+
static inline int pte_write(pte_t pte)
{
return pte_flags(pte) & _PAGE_RW;
@@ -181,6 +192,13 @@ static inline int pmd_trans_huge(pmd_t pmd)
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline int pud_trans_huge(pud_t pud)
+{
+ return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+}
+#endif
+
#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
@@ -192,6 +210,18 @@ static inline int pmd_devmap(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_DEVMAP);
}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline int pud_devmap(pud_t pud)
+{
+ return !!(pud_val(pud) & _PAGE_DEVMAP);
+}
+#else
+static inline int pud_devmap(pud_t pud)
+{
+ return 0;
+}
+#endif
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -333,6 +363,65 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
}
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return __pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return __pud(v & ~clear);
+}
+
+static inline pud_t pud_mkold(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_ACCESSED);
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_DIRTY);
+}
+
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_RW);
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+}
+
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_DEVMAP);
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_PSE);
+}
+
+static inline pud_t pud_mkyoung(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_ACCESSED);
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_RW);
+}
+
+static inline pud_t pud_mknotpresent(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE);
+}
+
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline int pte_soft_dirty(pte_t pte)
{
@@ -344,6 +433,11 @@ static inline int pmd_soft_dirty(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
}
+static inline int pud_soft_dirty(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_SOFT_DIRTY;
+}
+
static inline pte_t pte_mksoft_dirty(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
@@ -354,6 +448,11 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}
+static inline pud_t pud_mksoft_dirty(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
+}
+
static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
@@ -364,6 +463,11 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
}
+static inline pud_t pud_clear_soft_dirty(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
+}
+
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
/*
@@ -392,6 +496,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
massage_pgprot(pgprot));
}
+static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+{
+ return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
+ massage_pgprot(pgprot));
+}
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pteval_t val = pte_val(pte);
@@ -771,6 +881,14 @@ static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
return res;
}
+static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
+{
+ pud_t res = *pudp;
+
+ native_pud_clear(pudp);
+ return res;
+}
+
static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep , pte_t pte)
{
@@ -783,6 +901,12 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
native_set_pmd(pmdp, pmd);
}
+static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ native_set_pud(pudp, pud);
+}
+
#ifndef CONFIG_PARAVIRT
/*
* Rules for using pte_update - it must be called after any PTE update which
@@ -861,10 +985,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp);
+extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp);
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
@@ -884,6 +1013,13 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
return native_pmdp_get_and_clear(pmdp);
}
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ return native_pudp_get_and_clear(pudp);
+}
+
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
@@ -932,6 +1068,10 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd)
{
}
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud)
+{
+}
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 62b775926045..73c7ccc38912 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -106,6 +106,21 @@ static inline void native_pud_clear(pud_t *pud)
native_set_pud(pud, native_make_pud(0));
}
+static inline pud_t native_pudp_get_and_clear(pud_t *xp)
+{
+#ifdef CONFIG_SMP
+ return native_make_pud(xchg(&xp->pud, 0));
+#else
+ /* native_local_pudp_get_and_clear,
+ * but duplicated because of cyclic dependency
+ */
+ pud_t ret = *xp;
+
+ native_pud_clear(xp);
+ return ret;
+#endif
+}
+
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a1bfba0f7234..4797e87b0fb6 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
+ .set_pud_at = native_set_pud_at,
.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3feec5af4e67..6cbdff26bb96 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
return changed;
}
+
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp, pud_t entry, int dirty)
+{
+ int changed = !pud_same(*pudp, entry);
+
+ VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+
+ if (changed && dirty) {
+ *pudp = entry;
+ /*
+ * We had a write-protection fault here and changed the pud
+ * to to more permissive. No need to flush the TLB for that,
+ * #PF is architecturally guaranteed to do that and in the
+ * worst-case we'll generate a spurious fault.
+ */
+ }
+
+ return changed;
+}
#endif
int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp)
+{
+ int ret = 0;
+
+ if (pud_young(*pudp))
+ ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+ (unsigned long *)pudp);
+
+ return ret;
+}
#endif
int ptep_clear_flush_young(struct vm_area_struct *vma,
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 18af2bcefe6a..a0aba0f9c57b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
#else
static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
@@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
BUILD_BUG();
return 0;
}
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty)
+{
+ BUILD_BUG();
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
@@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
}
#endif
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address,
pmd_t *pmdp)
@@ -131,20 +141,40 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
pmd_clear(pmdp);
return pmd;
}
+#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address,
+ pud_t *pudp)
+{
+ pud_t pud = *pudp;
+
+ pud_clear(pudp);
+ return pud;
+}
+#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#endif
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp,
int full)
{
return pmdp_huge_get_and_clear(mm, address, pmdp);
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp,
+ int full)
+{
+ return pudp_huge_get_and_clear(mm, address, pudp);
+}
+#endif
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
@@ -181,6 +211,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp);
+extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
+ unsigned long address,
+ pud_t *pudp);
#endif
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -208,6 +241,23 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ pud_t old_pud = *pudp;
+
+ set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
+}
+#else
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+#endif
#ifndef pmdp_collapse_flush
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -273,12 +323,23 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
return pmd_val(pmd_a) == pmd_val(pmd_b);
}
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return pud_val(pud_a) == pud_val(pud_b);
+}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
BUILD_BUG();
return 0;
}
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ BUILD_BUG();
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
@@ -640,6 +701,15 @@ static inline int pmd_write(pmd_t pmd)
#endif /* __HAVE_ARCH_PMD_WRITE */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
+ (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+static inline int pud_trans_huge(pud_t pud)
+{
+ return 0;
+}
+#endif
+
#ifndef pmd_read_atomic
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
@@ -785,8 +855,10 @@ static inline int pmd_clear_huge(pmd_t *pmd)
* e.g. see arch/arc: flush_pmd_tlb_range
*/
#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
+#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
#else
#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG()
+#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG()
#endif
#endif
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 7eed8cf3130a..4329bc6ef04b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -232,6 +232,20 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
} while (0)
+/**
+ * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
+ * invalidation. This is a nop so far, because only x86 needs it.
+ */
+#ifndef __tlb_remove_pud_tlb_entry
+#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
+#endif
+
+#define tlb_remove_pud_tlb_entry(tlb, pudp, address) \
+ do { \
+ __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \
+ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \
+ } while (0)
+
/*
* For things like page tables caches (ie caching addresses "inside" the
* page tables, like x86 does), for legacy reasons, flushing an
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f0029e786205..a3762d49ba39 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *vma);
extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+ struct vm_area_struct *vma);
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+#else
+static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+}
+#endif
+
extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
@@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
extern int zap_huge_pmd(struct mmu_gather *tlb,
struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr);
+extern int zap_huge_pud(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ pud_t *pud, unsigned long addr);
extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
@@ -26,8 +41,10 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
- pfn_t pfn, bool write);
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, pfn_t pfn, bool write);
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, pfn_t pfn, bool write);
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
@@ -58,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, int flags);
-
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
+#define HPAGE_PUD_SHIFT PUD_SHIFT
+#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
+#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
+
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
#define transparent_hugepage_enabled(__vma) \
@@ -118,6 +136,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
bool freeze, struct page *page);
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long address);
+
+#define split_huge_pud(__vma, __pud, __address) \
+ do { \
+ pud_t *____pud = (__pud); \
+ if (pud_trans_huge(*____pud) \
+ || pud_devmap(*____pud)) \
+ __split_huge_pud(__vma, __pud, __address); \
+ } while (0)
+
extern int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice);
extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -126,6 +155,8 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
long adjust_next);
extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma);
+extern spinlock_t *__pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma);
/* mmap_sem must be held on entry */
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
@@ -136,6 +167,15 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
else
return NULL;
}
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma)
+{
+ VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
+ if (pud_trans_huge(*pud) || pud_devmap(*pud))
+ return __pud_trans_huge_lock(pud, vma);
+ else
+ return NULL;
+}
static inline int hpage_nr_pages(struct page *page)
{
if (unlikely(PageTransHuge(page)))
@@ -143,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page)
return 1;
}
+struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, int flags);
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, int flags);
+
extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page;
@@ -157,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
return is_huge_zero_page(pmd_page(pmd));
}
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+ return false;
+}
+
struct page *mm_get_huge_zero_page(struct mm_struct *mm);
void mm_put_huge_zero_page(struct mm_struct *mm);
@@ -167,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
+
#define hpage_nr_pages(x) 1
#define transparent_hugepage_enabled(__vma) 0
@@ -195,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
unsigned long address, bool freeze, struct page *page) {}
+#define split_huge_pud(__vma, __pmd, __address) \
+ do { } while (0)
+
static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
@@ -212,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
{
return NULL;
}
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma)
+{
+ return NULL;
+}
static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
{
@@ -223,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+ return false;
+}
+
static inline void mm_put_huge_zero_page(struct mm_struct *mm)
{
return;
@@ -233,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
{
return NULL;
}
+
+static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud, int flags)
+{
+ return NULL;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 035a688e5472..d8b75d7d6a9e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -424,6 +424,10 @@ static inline int pmd_devmap(pmd_t pmd)
{
return 0;
}
+static inline int pud_devmap(pud_t pud)
+{
+ return 0;
+}
#endif
/*
@@ -1199,6 +1203,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
/**
* mm_walk - callbacks for walk_page_range
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ * this handler should only handle pud_trans_huge() puds.
+ * the pmd_entry or pte_entry callbacks will be used for
+ * regular PUDs.
* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
@@ -1218,6 +1226,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
* (see the comment on walk_page_range() for more details)
*/
struct mm_walk {
+ int (*pud_entry)(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1801,8 +1811,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
return ptl;
}
-extern void __init pagecache_init(void);
+/*
+ * No scalability reason to split PUD locks yet, but follow the same pattern
+ * as the PMD locks to make it easier if we decide to. The VM should not be
+ * considered ready to switch to split PUD locks yet; there may be places
+ * which need to be converted from page_table_lock.
+ */
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+ return &mm->page_table_lock;
+}
+
+static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
+{
+ spinlock_t *ptl = pud_lockptr(mm, pud);
+
+ spin_lock(ptl);
+ return ptl;
+}
+extern void __init pagecache_init(void);
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index a1a210d59961..51891fb0d3ce 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
___pmd; \
})
+#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pud_t ___pud; \
+ \
+ ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \
+ mmu_notifier_invalidate_range(___mm, ___haddr, \
+ ___haddr + HPAGE_PUD_SIZE); \
+ \
+ ___pud; \
+})
+
#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)