summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig7
-rw-r--r--mm/backing-dev.c5
-rw-r--r--mm/compaction.c2
-rw-r--r--mm/debug.c28
-rw-r--r--mm/early_ioremap.c2
-rw-r--r--mm/fadvise.c10
-rw-r--r--mm/filemap.c1
-rw-r--r--mm/frame_vector.c6
-rw-r--r--mm/gup.c48
-rw-r--r--mm/hmm.c12
-rw-r--r--mm/huge_memory.c88
-rw-r--r--mm/hugetlb.c377
-rw-r--r--mm/hwpoison-inject.c2
-rw-r--r--mm/interval_tree.c2
-rw-r--r--mm/khugepaged.c15
-rw-r--r--mm/kmemcheck.c1
-rw-r--r--mm/kmemleak.c5
-rw-r--r--mm/ksm.c9
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memcontrol.c287
-rw-r--r--mm/memory-failure.c48
-rw-r--r--mm/memory.c101
-rw-r--r--mm/memory_hotplug.c9
-rw-r--r--mm/mempolicy.c39
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/mmu_notifier.c31
-rw-r--r--mm/mprotect.c11
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/oom_kill.c25
-rw-r--r--mm/page_alloc.c187
-rw-r--r--mm/page_ext.c2
-rw-r--r--mm/page_io.c4
-rw-r--r--mm/page_owner.c21
-rw-r--r--mm/page_vma_mapped.c66
-rw-r--r--mm/percpu.c4
-rw-r--r--mm/pgtable-generic.c6
-rw-r--r--mm/shmem.c59
-rw-r--r--mm/slab.c27
-rw-r--r--mm/slab.h3
-rw-r--r--mm/slab_common.c56
-rw-r--r--mm/slub.c12
-rw-r--r--mm/sparse.c8
-rw-r--r--mm/swap.c27
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c23
-rw-r--r--mm/util.c36
-rw-r--r--mm/vmscan.c89
-rw-r--r--mm/zpool.c25
-rw-r--r--mm/zsmalloc.c32
-rw-r--r--mm/zswap.c91
52 files changed, 1057 insertions, 918 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 03ff7703d322..c782e8fb7235 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -639,15 +639,10 @@ config MAX_STACK_SIZE_MB
A sane initial value is 80 MB.
-# For architectures that support deferred memory initialisation
-config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
- bool
-
config DEFERRED_STRUCT_PAGE_INIT
bool "Defer initialisation of struct pages to kthreads"
default n
- depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
- depends on NO_BOOTMEM && MEMORY_HOTPLUG
+ depends on NO_BOOTMEM
depends on !FLATMEM
help
Ordinarily all struct pages are initialised during early boot in a
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 84b2dc76f140..b5f940ce0143 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
if (IS_ERR(dev))
return PTR_ERR(dev);
- if (bdi_debug_register(bdi, dev_name(dev))) {
- device_destroy(bdi_class, dev->devt);
- return -ENOMEM;
- }
cgwb_bdi_register(bdi);
bdi->dev = dev;
+ bdi_debug_register(bdi, dev_name(dev));
set_bit(WB_registered, &bdi->wb.state);
spin_lock_bh(&bdi_lock);
diff --git a/mm/compaction.c b/mm/compaction.c
index 10cd757f1006..2c8999d027ab 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1738,7 +1738,7 @@ int sysctl_extfrag_threshold = 500;
* @order: The order of the current allocation
* @alloc_flags: The allocation flags of the current allocation
* @ac: The context of current allocation
- * @mode: The migration mode for async, sync light, or sync migration
+ * @prio: Determines how hard direct compaction should try to succeed
*
* This is the main entry point for direct page compaction.
*/
diff --git a/mm/debug.c b/mm/debug.c
index d947f3e03b0d..56e2d9125ea5 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason)
*/
int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
- pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
+ pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
page, page_ref_count(page), mapcount,
page->mapping, page_to_pgoff(page));
if (PageCompound(page))
@@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason)
#ifdef CONFIG_MEMCG
if (page->mem_cgroup)
- pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
+ pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
#endif
}
@@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page);
void dump_vma(const struct vm_area_struct *vma)
{
- pr_emerg("vma %p start %p end %p\n"
- "next %p prev %p mm %p\n"
- "prot %lx anon_vma %p vm_ops %p\n"
- "pgoff %lx file %p private_data %p\n"
+ pr_emerg("vma %px start %px end %px\n"
+ "next %px prev %px mm %px\n"
+ "prot %lx anon_vma %px vm_ops %px\n"
+ "pgoff %lx file %px private_data %px\n"
"flags: %#lx(%pGv)\n",
vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
vma->vm_prev, vma->vm_mm,
@@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma);
void dump_mm(const struct mm_struct *mm)
{
- pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
+ pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"
#ifdef CONFIG_MMU
- "get_unmapped_area %p\n"
+ "get_unmapped_area %px\n"
#endif
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
- "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
+ "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
"start_brk %lx brk %lx start_stack %lx\n"
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
- "binfmt %p flags %lx core_state %p\n"
+ "binfmt %px flags %lx core_state %px\n"
#ifdef CONFIG_AIO
- "ioctx_table %p\n"
+ "ioctx_table %px\n"
#endif
#ifdef CONFIG_MEMCG
- "owner %p "
+ "owner %px "
#endif
- "exe_file %p\n"
+ "exe_file %px\n"
#ifdef CONFIG_MMU_NOTIFIER
- "mmu_notifier_mm %p\n"
+ "mmu_notifier_mm %px\n"
#endif
#ifdef CONFIG_NUMA_BALANCING
"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index d04ac1ec0559..1826f191e72c 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -111,7 +111,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
enum fixed_addresses idx;
int i, slot;
- WARN_ON(system_state != SYSTEM_BOOTING);
+ WARN_ON(system_state >= SYSTEM_RUNNING);
slot = -1;
for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
diff --git a/mm/fadvise.c b/mm/fadvise.c
index ec70d6e4b86d..767887f5f3bf 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -127,7 +127,15 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
*/
start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
end_index = (endbyte >> PAGE_SHIFT);
- if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
+ /*
+ * The page at end_index will be inclusively discarded according
+ * by invalidate_mapping_pages(), so subtracting 1 from
+ * end_index means we will skip the last page. But if endbyte
+ * is page aligned or is at the end of file, we should not skip
+ * that page - discarding the last page is safe enough.
+ */
+ if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
+ endbyte != inode->i_size - 1) {
/* First page is tricky as 0 - 1 = -1, but pgoff_t
* is unsigned, so the end_index >= start_index
* check below would be true and we'll discard the whole
diff --git a/mm/filemap.c b/mm/filemap.c
index ee83baaf855d..693f62212a59 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,7 +31,6 @@
#include <linux/blkdev.h>
#include <linux/security.h>
#include <linux/cpuset.h>
-#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
#include <linux/cleancache.h>
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 297c7238f7d4..c64dca6e27c2 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -62,8 +62,10 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
* get_user_pages_longterm() and disallow it for filesystem-dax
* mappings.
*/
- if (vma_is_fsdax(vma))
- return -EOPNOTSUPP;
+ if (vma_is_fsdax(vma)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
vec->got_ref = true;
diff --git a/mm/gup.c b/mm/gup.c
index d3fb60e5bfac..9e17d8db2d6b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
*/
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
- return pte_access_permitted(pte, WRITE) ||
+ return pte_write(pte) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}
@@ -848,7 +848,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
- int *locked, bool notify_drop,
+ int *locked,
unsigned int flags)
{
long ret, pages_done;
@@ -922,7 +922,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
pages++;
start += PAGE_SIZE;
}
- if (notify_drop && lock_dropped && *locked) {
+ if (lock_dropped && *locked) {
/*
* We must let the caller know we temporarily dropped the lock
* and so the critical section protected by it was lost.
@@ -959,36 +959,12 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
int *locked)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
- pages, NULL, locked, true,
+ pages, NULL, locked,
gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages_locked);
/*
- * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows for
- * tsk, mm to be specified.
- *
- * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
- * caller if required (just like with __get_user_pages). "FOLL_GET"
- * is set implicitly if "pages" is non-NULL.
- */
-static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk,
- struct mm_struct *mm, unsigned long start,
- unsigned long nr_pages, struct page **pages,
- unsigned int gup_flags)
-{
- long ret;
- int locked = 1;
-
- down_read(&mm->mmap_sem);
- ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
- &locked, false, gup_flags);
- if (locked)
- up_read(&mm->mmap_sem);
- return ret;
-}
-
-/*
* get_user_pages_unlocked() is suitable to replace the form:
*
* down_read(&mm->mmap_sem);
@@ -1006,8 +982,16 @@ static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk,
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags)
{
- return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
- pages, gup_flags | FOLL_TOUCH);
+ struct mm_struct *mm = current->mm;
+ int locked = 1;
+ long ret;
+
+ down_read(&mm->mmap_sem);
+ ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
+ &locked, gup_flags | FOLL_TOUCH);
+ if (locked)
+ up_read(&mm->mmap_sem);
+ return ret;
}
EXPORT_SYMBOL(get_user_pages_unlocked);
@@ -1073,7 +1057,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
struct vm_area_struct **vmas, int *locked)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
- locked, true,
+ locked,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
@@ -1090,7 +1074,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
- pages, vmas, NULL, false,
+ pages, vmas, NULL,
gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
diff --git a/mm/hmm.c b/mm/hmm.c
index 3a5c172af560..979211c7ccc8 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -391,11 +391,11 @@ again:
if (pmd_protnone(pmd))
return hmm_vma_walk_clear(start, end, walk);
- if (!pmd_access_permitted(pmd, write_fault))
+ if (write_fault && !pmd_write(pmd))
return hmm_vma_walk_clear(start, end, walk);
pfn = pmd_pfn(pmd) + pte_index(addr);
- flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0;
+ flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
return 0;
@@ -418,7 +418,7 @@ again:
}
if (!pte_present(pte)) {
- swp_entry_t entry;
+ swp_entry_t entry = pte_to_swp_entry(pte);
if (!non_swap_entry(entry)) {
if (hmm_vma_walk->fault)
@@ -426,8 +426,6 @@ again:
continue;
}
- entry = pte_to_swp_entry(pte);
-
/*
* This is a special swap entry, ignore migration, use
* device and report anything else as error.
@@ -456,11 +454,11 @@ again:
continue;
}
- if (!pte_access_permitted(pte, write_fault))
+ if (write_fault && !pte_write(pte))
goto fault;
pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
- pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0;
+ pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
continue;
fault:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2f2f5e774902..87ab9b8f56b5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
*/
WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
- if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE))
+ if (flags & FOLL_WRITE && !pmd_write(*pmd))
return NULL;
if (pmd_present(*pmd) && pmd_devmap(*pmd))
@@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pud_lockptr(mm, pud));
- if (!pud_access_permitted(*pud, flags & FOLL_WRITE))
+ if (flags & FOLL_WRITE && !pud_write(*pud))
return NULL;
if (pud_present(*pud) && pud_devmap(*pud))
@@ -1386,7 +1386,7 @@ out_unlock:
*/
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{
- return pmd_access_permitted(pmd, WRITE) ||
+ return pmd_write(pmd) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
}
@@ -1910,17 +1910,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
* pmdp_invalidate() is required to make sure we don't miss
* dirty/young flags set by hardware.
*/
- entry = *pmd;
- pmdp_invalidate(vma, addr, pmd);
-
- /*
- * Recover dirty/young flags. It relies on pmdp_invalidate to not
- * corrupt them.
- */
- if (pmd_dirty(*pmd))
- entry = pmd_mkdirty(entry);
- if (pmd_young(*pmd))
- entry = pmd_mkyoung(entry);
+ entry = pmdp_invalidate(vma, addr, pmd);
entry = pmd_modify(entry, newprot);
if (preserve_write)
@@ -2073,8 +2063,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
struct mm_struct *mm = vma->vm_mm;
struct page *page;
pgtable_t pgtable;
- pmd_t _pmd;
- bool young, write, dirty, soft_dirty, pmd_migration = false;
+ pmd_t old_pmd, _pmd;
+ bool young, write, soft_dirty, pmd_migration = false;
unsigned long addr;
int i;
@@ -2116,24 +2106,50 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
return __split_huge_zero_page_pmd(vma, haddr, pmd);
}
+ /*
+ * Up to this point the pmd is present and huge and userland has the
+ * whole access to the hugepage during the split (which happens in
+ * place). If we overwrite the pmd with the not-huge version pointing
+ * to the pte here (which of course we could if all CPUs were bug
+ * free), userland could trigger a small page size TLB miss on the
+ * small sized TLB while the hugepage TLB entry is still established in
+ * the huge TLB. Some CPU doesn't like that.
+ * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum
+ * 383 on page 93. Intel should be safe but is also warns that it's
+ * only safe if the permission and cache attributes of the two entries
+ * loaded in the two TLB is identical (which should be the case here).
+ * But it is generally safer to never allow small and huge TLB entries
+ * for the same virtual address to be loaded simultaneously. So instead
+ * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the
+ * current pmd notpresent (atomically because here the pmd_trans_huge
+ * must remain set at all times on the pmd until the split is complete
+ * for this pmd), then we flush the SMP TLB and finally we write the
+ * non-huge version of the pmd entry with pmd_populate.
+ */
+ old_pmd = pmdp_invalidate(vma, haddr, pmd);
+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
- pmd_migration = is_pmd_migration_entry(*pmd);
+ pmd_migration = is_pmd_migration_entry(old_pmd);
if (pmd_migration) {
swp_entry_t entry;
- entry = pmd_to_swp_entry(*pmd);
+ entry = pmd_to_swp_entry(old_pmd);
page = pfn_to_page(swp_offset(entry));
} else
#endif
- page = pmd_page(*pmd);
+ page = pmd_page(old_pmd);
VM_BUG_ON_PAGE(!page_count(page), page);
page_ref_add(page, HPAGE_PMD_NR - 1);
- write = pmd_write(*pmd);
- young = pmd_young(*pmd);
- dirty = pmd_dirty(*pmd);
- soft_dirty = pmd_soft_dirty(*pmd);
+ if (pmd_dirty(old_pmd))
+ SetPageDirty(page);
+ write = pmd_write(old_pmd);
+ young = pmd_young(old_pmd);
+ soft_dirty = pmd_soft_dirty(old_pmd);
- pmdp_huge_split_prepare(vma, haddr, pmd);
+ /*
+ * Withdraw the table only after we mark the pmd entry invalid.
+ * This's critical for some architectures (Power).
+ */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);
@@ -2160,8 +2176,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (soft_dirty)
entry = pte_mksoft_dirty(entry);
}
- if (dirty)
- SetPageDirty(page + i);
pte = pte_offset_map(&_pmd, addr);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, entry);
@@ -2189,28 +2203,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
}
smp_wmb(); /* make pte visible before pmd */
- /*
- * Up to this point the pmd is present and huge and userland has the
- * whole access to the hugepage during the split (which happens in
- * place). If we overwrite the pmd with the not-huge version pointing
- * to the pte here (which of course we could if all CPUs were bug
- * free), userland could trigger a small page size TLB miss on the
- * small sized TLB while the hugepage TLB entry is still established in
- * the huge TLB. Some CPU doesn't like that.
- * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum
- * 383 on page 93. Intel should be safe but is also warns that it's
- * only safe if the permission and cache attributes of the two entries
- * loaded in the two TLB is identical (which should be the case here).
- * But it is generally safer to never allow small and huge TLB entries
- * for the same virtual address to be loaded simultaneously. So instead
- * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the
- * current pmd notpresent (atomically because here the pmd_trans_huge
- * and pmd_trans_splitting must remain set at all times on the pmd
- * until the split is complete for this pmd), then we flush the SMP TLB
- * and finally we write the non-huge version of the pmd entry with
- * pmd_populate.
- */
- pmdp_invalidate(vma, haddr, pmd);
pmd_populate(mm, pmd, pgtable);
if (freeze) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9a334f5fb730..7c204e3d132b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,10 +34,9 @@
#include <linux/hugetlb_cgroup.h>
#include <linux/node.h>
#include <linux/userfaultfd_k.h>
+#include <linux/page_owner.h>
#include "internal.h"
-int hugepages_treat_as_movable;
-
int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
struct hstate hstates[HUGE_MAX_HSTATE];
@@ -926,7 +925,7 @@ retry_cpuset:
/* Movability of hugepages depends on migration support. */
static inline gfp_t htlb_alloc_mask(struct hstate *h)
{
- if (hugepages_treat_as_movable || hugepage_migration_supported(h))
+ if (hugepage_migration_supported(h))
return GFP_HIGHUSER_MOVABLE;
else
return GFP_HIGHUSER;
@@ -1108,7 +1107,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
return zone_spans_pfn(zone, last_pfn);
}
-static struct page *alloc_gigantic_page(int nid, struct hstate *h)
+static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nodemask)
{
unsigned int order = huge_page_order(h);
unsigned long nr_pages = 1 << order;
@@ -1116,11 +1116,9 @@ static struct page *alloc_gigantic_page(int nid, struct hstate *h)
struct zonelist *zonelist;
struct zone *zone;
struct zoneref *z;
- gfp_t gfp_mask;
- gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
zonelist = node_zonelist(nid, gfp_mask);
- for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), NULL) {
+ for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nodemask) {
spin_lock_irqsave(&zone->lock, flags);
pfn = ALIGN(zone->zone_start_pfn, nr_pages);
@@ -1151,41 +1149,13 @@ static struct page *alloc_gigantic_page(int nid, struct hstate *h)
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
static void prep_compound_gigantic_page(struct page *page, unsigned int order);
-static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
-{
- struct page *page;
-
- page = alloc_gigantic_page(nid, h);
- if (page) {
- prep_compound_gigantic_page(page, huge_page_order(h));
- prep_new_huge_page(h, page, nid);
- }
-
- return page;
-}
-
-static int alloc_fresh_gigantic_page(struct hstate *h,
- nodemask_t *nodes_allowed)
-{
- struct page *page = NULL;
- int nr_nodes, node;
-
- for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
- page = alloc_fresh_gigantic_page_node(h, node);
- if (page)
- return 1;
- }
-
- return 0;
-}
-
#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
static inline bool gigantic_page_supported(void) { return false; }
+static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nodemask) { return NULL; }
static inline void free_gigantic_page(struct page *page, unsigned int order) { }
static inline void destroy_compound_gigantic_page(struct page *page,
unsigned int order) { }
-static inline int alloc_fresh_gigantic_page(struct hstate *h,
- nodemask_t *nodes_allowed) { return 0; }
#endif
static void update_and_free_page(struct hstate *h, struct page *page)
@@ -1250,6 +1220,28 @@ static void clear_page_huge_active(struct page *page)
ClearPagePrivate(&page[1]);
}
+/*
+ * Internal hugetlb specific page flag. Do not use outside of the hugetlb
+ * code