summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c23
-rw-r--r--mm/bootmem.c5
-rw-r--r--mm/bounce.c4
-rw-r--r--mm/cleancache.c98
-rw-r--r--mm/compaction.c101
-rw-r--r--mm/filemap.c38
-rw-r--r--mm/filemap_xip.c7
-rw-r--r--mm/huge_memory.c133
-rw-r--r--mm/hugetlb.c211
-rw-r--r--mm/hwpoison-inject.c4
-rw-r--r--mm/kmemleak.c3
-rw-r--r--mm/ksm.c57
-rw-r--r--mm/madvise.c10
-rw-r--r--mm/memblock.c6
-rw-r--r--mm/memcontrol.c680
-rw-r--r--mm/memory-failure.c98
-rw-r--r--mm/memory.c198
-rw-r--r--mm/mempolicy.c65
-rw-r--r--mm/migrate.c40
-rw-r--r--mm/mincore.c2
-rw-r--r--mm/mlock.c3
-rw-r--r--mm/mmap.c90
-rw-r--r--mm/mmu_context.c2
-rw-r--r--mm/mprotect.c7
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/nommu.c9
-rw-r--r--mm/oom_kill.c166
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c59
-rw-r--r--mm/page_cgroup.c4
-rw-r--r--mm/pagewalk.c2
-rw-r--r--mm/percpu-vm.c3
-rw-r--r--mm/pgtable-generic.c5
-rw-r--r--mm/process_vm_access.c23
-rw-r--r--mm/rmap.c70
-rw-r--r--mm/shmem.c106
-rw-r--r--mm/slab.c13
-rw-r--r--mm/slub.c40
-rw-r--r--mm/sparse.c30
-rw-r--r--mm/swap.c14
-rw-r--r--mm/swap_state.c34
-rw-r--r--mm/swapfile.c92
-rw-r--r--mm/truncate.c12
-rw-r--r--mm/util.c41
-rw-r--r--mm/vmalloc.c8
-rw-r--r--mm/vmscan.c152
46 files changed, 1598 insertions, 1173 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7ba8feae11b8..dd8e2aafb07e 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -318,7 +318,7 @@ static void wakeup_timer_fn(unsigned long data)
if (bdi->wb.task) {
trace_writeback_wake_thread(bdi);
wake_up_process(bdi->wb.task);
- } else {
+ } else if (bdi->dev) {
/*
* When bdi tasks are inactive for long time, they are killed.
* In this case we have to wake-up the forker thread which
@@ -584,6 +584,8 @@ EXPORT_SYMBOL(bdi_register_dev);
*/
static void bdi_wb_shutdown(struct backing_dev_info *bdi)
{
+ struct task_struct *task;
+
if (!bdi_cap_writeback_dirty(bdi))
return;
@@ -602,8 +604,13 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
* Finally, kill the kernel thread. We don't need to be RCU
* safe anymore, since the bdi is gone from visibility.
*/
- if (bdi->wb.task)
- kthread_stop(bdi->wb.task);
+ spin_lock_bh(&bdi->wb_lock);
+ task = bdi->wb.task;
+ bdi->wb.task = NULL;
+ spin_unlock_bh(&bdi->wb_lock);
+
+ if (task)
+ kthread_stop(task);
}
/*
@@ -623,7 +630,9 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
void bdi_unregister(struct backing_dev_info *bdi)
{
- if (bdi->dev) {
+ struct device *dev = bdi->dev;
+
+ if (dev) {
bdi_set_min_ratio(bdi, 0);
trace_writeback_bdi_unregister(bdi);
bdi_prune_sb(bdi);
@@ -632,8 +641,12 @@ void bdi_unregister(struct backing_dev_info *bdi)
if (!bdi_cap_flush_forker(bdi))
bdi_wb_shutdown(bdi);
bdi_debug_unregister(bdi);
- device_unregister(bdi->dev);
+
+ spin_lock_bh(&bdi->wb_lock);
bdi->dev = NULL;
+ spin_unlock_bh(&bdi->wb_lock);
+
+ device_unregister(dev);
}
}
EXPORT_SYMBOL(bdi_unregister);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 668e94df8cf2..0131170c9d54 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -766,14 +766,13 @@ void * __init alloc_bootmem_section(unsigned long size,
unsigned long section_nr)
{
bootmem_data_t *bdata;
- unsigned long pfn, goal, limit;
+ unsigned long pfn, goal;
pfn = section_nr_to_pfn(section_nr);
goal = pfn << PAGE_SHIFT;
- limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
- return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
+ return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0);
}
#endif
diff --git a/mm/bounce.c b/mm/bounce.c
index 4e9ae722af83..d1be02ca1889 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -50,9 +50,9 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
unsigned char *vto;
local_irq_save(flags);
- vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
+ vto = kmap_atomic(to->bv_page);
memcpy(vto + to->bv_offset, vfrom, to->bv_len);
- kunmap_atomic(vto, KM_BOUNCE_READ);
+ kunmap_atomic(vto);
local_irq_restore(flags);
}
diff --git a/mm/cleancache.c b/mm/cleancache.c
index bcaae4c2a770..5646c740f613 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -15,29 +15,34 @@
#include <linux/fs.h>
#include <linux/exportfs.h>
#include <linux/mm.h>
+#include <linux/debugfs.h>
#include <linux/cleancache.h>
/*
* This global enablement flag may be read thousands of times per second
- * by cleancache_get/put/flush even on systems where cleancache_ops
+ * by cleancache_get/put/invalidate even on systems where cleancache_ops
* is not claimed (e.g. cleancache is config'ed on but remains
* disabled), so is preferred to the slower alternative: a function
* call that checks a non-global.
*/
-int cleancache_enabled;
+int cleancache_enabled __read_mostly;
EXPORT_SYMBOL(cleancache_enabled);
/*
* cleancache_ops is set by cleancache_ops_register to contain the pointers
* to the cleancache "backend" implementation functions.
*/
-static struct cleancache_ops cleancache_ops;
+static struct cleancache_ops cleancache_ops __read_mostly;
-/* useful stats available in /sys/kernel/mm/cleancache */
-static unsigned long cleancache_succ_gets;
-static unsigned long cleancache_failed_gets;
-static unsigned long cleancache_puts;
-static unsigned long cleancache_flushes;
+/*
+ * Counters available via /sys/kernel/debug/frontswap (if debugfs is
+ * properly configured. These are for information only so are not protected
+ * against increment races.
+ */
+static u64 cleancache_succ_gets;
+static u64 cleancache_failed_gets;
+static u64 cleancache_puts;
+static u64 cleancache_invalidates;
/*
* register operations for cleancache, returning previous thus allowing
@@ -148,10 +153,11 @@ void __cleancache_put_page(struct page *page)
EXPORT_SYMBOL(__cleancache_put_page);
/*
- * Flush any data from cleancache associated with the poolid and the
+ * Invalidate any data from cleancache associated with the poolid and the
* page's inode and page index so that a subsequent "get" will fail.
*/
-void __cleancache_flush_page(struct address_space *mapping, struct page *page)
+void __cleancache_invalidate_page(struct address_space *mapping,
+ struct page *page)
{
/* careful... page->mapping is NULL sometimes when this is called */
int pool_id = mapping->host->i_sb->cleancache_poolid;
@@ -160,85 +166,57 @@ void __cleancache_flush_page(struct address_space *mapping, struct page *page)
if (pool_id >= 0) {
VM_BUG_ON(!PageLocked(page));
if (cleancache_get_key(mapping->host, &key) >= 0) {
- (*cleancache_ops.flush_page)(pool_id, key, page->index);
- cleancache_flushes++;
+ (*cleancache_ops.invalidate_page)(pool_id,
+ key, page->index);
+ cleancache_invalidates++;
}
}
}
-EXPORT_SYMBOL(__cleancache_flush_page);
+EXPORT_SYMBOL(__cleancache_invalidate_page);
/*
- * Flush all data from cleancache associated with the poolid and the
+ * Invalidate all data from cleancache associated with the poolid and the
* mappings's inode so that all subsequent gets to this poolid/inode
* will fail.
*/
-void __cleancache_flush_inode(struct address_space *mapping)
+void __cleancache_invalidate_inode(struct address_space *mapping)
{
int pool_id = mapping->host->i_sb->cleancache_poolid;
struct cleancache_filekey key = { .u.key = { 0 } };
if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
- (*cleancache_ops.flush_inode)(pool_id, key);
+ (*cleancache_ops.invalidate_inode)(pool_id, key);
}
-EXPORT_SYMBOL(__cleancache_flush_inode);
+EXPORT_SYMBOL(__cleancache_invalidate_inode);
/*
* Called by any cleancache-enabled filesystem at time of unmount;
* note that pool_id is surrendered and may be reutrned by a subsequent
* cleancache_init_fs or cleancache_init_shared_fs
*/
-void __cleancache_flush_fs(struct super_block *sb)
+void __cleancache_invalidate_fs(struct super_block *sb)
{
if (sb->cleancache_poolid >= 0) {
int old_poolid = sb->cleancache_poolid;
sb->cleancache_poolid = -1;
- (*cleancache_ops.flush_fs)(old_poolid);
+ (*cleancache_ops.invalidate_fs)(old_poolid);
}
}
-EXPORT_SYMBOL(__cleancache_flush_fs);
-
-#ifdef CONFIG_SYSFS
-
-/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */
-
-#define CLEANCACHE_SYSFS_RO(_name) \
- static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, char *buf) \
- { \
- return sprintf(buf, "%lu\n", cleancache_##_name); \
- } \
- static struct kobj_attribute cleancache_##_name##_attr = { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
- .show = cleancache_##_name##_show, \
- }
-
-CLEANCACHE_SYSFS_RO(succ_gets);
-CLEANCACHE_SYSFS_RO(failed_gets);
-CLEANCACHE_SYSFS_RO(puts);
-CLEANCACHE_SYSFS_RO(flushes);
-
-static struct attribute *cleancache_attrs[] = {
- &cleancache_succ_gets_attr.attr,
- &cleancache_failed_gets_attr.attr,
- &cleancache_puts_attr.attr,
- &cleancache_flushes_attr.attr,
- NULL,
-};
-
-static struct attribute_group cleancache_attr_group = {
- .attrs = cleancache_attrs,
- .name = "cleancache",
-};
-
-#endif /* CONFIG_SYSFS */
+EXPORT_SYMBOL(__cleancache_invalidate_fs);
static int __init init_cleancache(void)
{
-#ifdef CONFIG_SYSFS
- int err;
-
- err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
-#endif /* CONFIG_SYSFS */
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *root = debugfs_create_dir("cleancache", NULL);
+ if (root == NULL)
+ return -ENXIO;
+ debugfs_create_u64("succ_gets", S_IRUGO, root, &cleancache_succ_gets);
+ debugfs_create_u64("failed_gets", S_IRUGO,
+ root, &cleancache_failed_gets);
+ debugfs_create_u64("puts", S_IRUGO, root, &cleancache_puts);
+ debugfs_create_u64("invalidates", S_IRUGO,
+ root, &cleancache_invalidates);
+#endif
return 0;
}
module_init(init_cleancache)
diff --git a/mm/compaction.c b/mm/compaction.c
index 71a58f67f481..74a8c825ff28 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -35,7 +35,7 @@ struct compact_control {
unsigned long migrate_pfn; /* isolate_migratepages search base */
bool sync; /* Synchronous migration */
- unsigned int order; /* order a direct compactor needs */
+ int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
};
@@ -313,12 +313,34 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
} else if (!locked)
spin_lock_irq(&zone->lru_lock);
+ /*
+ * migrate_pfn does not necessarily start aligned to a
+ * pageblock. Ensure that pfn_valid is called when moving
+ * into a new MAX_ORDER_NR_PAGES range in case of large
+ * memory holes within the zone
+ */
+ if ((low_pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {
+ if (!pfn_valid(low_pfn)) {
+ low_pfn += MAX_ORDER_NR_PAGES - 1;
+ continue;
+ }
+ }
+
if (!pfn_valid_within(low_pfn))
continue;
nr_scanned++;
- /* Get the page and skip if free */
+ /*
+ * Get the page and ensure the page is within the same zone.
+ * See the comment in isolate_freepages about overlapping
+ * nodes. It is deliberate that the new zone lock is not taken
+ * as memory compaction should not move pages between nodes.
+ */
page = pfn_to_page(low_pfn);
+ if (page_zone(page) != zone)
+ continue;
+
+ /* Skip if free */
if (PageBuddy(page))
continue;
@@ -653,49 +675,71 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
/* Compact all zones within a node */
-static int compact_node(int nid)
+static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
{
int zoneid;
- pg_data_t *pgdat;
struct zone *zone;
- if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
- return -EINVAL;
- pgdat = NODE_DATA(nid);
-
- /* Flush pending updates to the LRU lists */
- lru_add_drain_all();
-
for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
- struct compact_control cc = {
- .nr_freepages = 0,
- .nr_migratepages = 0,
- .order = -1,
- .sync = true,
- };
zone = &pgdat->node_zones[zoneid];
if (!populated_zone(zone))
continue;
- cc.zone = zone;
- INIT_LIST_HEAD(&cc.freepages);
- INIT_LIST_HEAD(&cc.migratepages);
-
- compact_zone(zone, &cc);
+ cc->nr_freepages = 0;
+ cc->nr_migratepages = 0;
+ cc->zone = zone;
+ INIT_LIST_HEAD(&cc->freepages);
+ INIT_LIST_HEAD(&cc->migratepages);
+
+ if (cc->order == -1 || !compaction_deferred(zone, cc->order))
+ compact_zone(zone, cc);
+
+ if (cc->order > 0) {
+ int ok = zone_watermark_ok(zone, cc->order,
+ low_wmark_pages(zone), 0, 0);
+ if (ok && cc->order > zone->compact_order_failed)
+ zone->compact_order_failed = cc->order + 1;
+ /* Currently async compaction is never deferred. */
+ else if (!ok && cc->sync)
+ defer_compaction(zone, cc->order);
+ }
- VM_BUG_ON(!list_empty(&cc.freepages));
- VM_BUG_ON(!list_empty(&cc.migratepages));
+ VM_BUG_ON(!list_empty(&cc->freepages));
+ VM_BUG_ON(!list_empty(&cc->migratepages));
}
return 0;
}
+int compact_pgdat(pg_data_t *pgdat, int order)
+{
+ struct compact_control cc = {
+ .order = order,
+ .sync = false,
+ };
+
+ return __compact_pgdat(pgdat, &cc);
+}
+
+static int compact_node(int nid)
+{
+ struct compact_control cc = {
+ .order = -1,
+ .sync = true,
+ };
+
+ return __compact_pgdat(NODE_DATA(nid), &cc);
+}
+
/* Compact all nodes in the system */
static int compact_nodes(void)
{
int nid;
+ /* Flush pending updates to the LRU lists */
+ lru_add_drain_all();
+
for_each_online_node(nid)
compact_node(nid);
@@ -728,7 +772,14 @@ ssize_t sysfs_compact_node(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- compact_node(dev->id);
+ int nid = dev->id;
+
+ if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
+ /* Flush pending updates to the LRU lists */
+ lru_add_drain_all();
+
+ compact_node(nid);
+ }
return count;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 97f49ed35bd2..c3811bc6b9e3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -101,9 +101,8 @@
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
*
- * (code doesn't rely on that order, so you could switch it around)
- * ->tasklist_lock (memory_failure, collect_procs_ao)
- * ->i_mmap_mutex
+ * ->i_mmap_mutex
+ * ->tasklist_lock (memory_failure, collect_procs_ao)
*/
/*
@@ -123,7 +122,7 @@ void __delete_from_page_cache(struct page *page)
if (PageUptodate(page) && PageMappedToDisk(page))
cleancache_put_page(page);
else
- cleancache_flush_page(mapping, page);
+ cleancache_invalidate_page(mapping, page);
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
@@ -500,10 +499,13 @@ struct page *__page_cache_alloc(gfp_t gfp)
struct page *page;
if (cpuset_do_page_mem_spread()) {
- get_mems_allowed();
- n = cpuset_mem_spread_node();
- page = alloc_pages_exact_node(n, gfp, 0);
- put_mems_allowed();
+ unsigned int cpuset_mems_cookie;
+ do {
+ cpuset_mems_cookie = get_mems_allowed();
+ n = cpuset_mem_spread_node();
+ page = alloc_pages_exact_node(n, gfp, 0);
+ } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
+
return page;
}
return alloc_pages(gfp, 0);
@@ -1318,10 +1320,10 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
* taking the kmap.
*/
if (!fault_in_pages_writeable(desc->arg.buf, size)) {
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
left = __copy_to_user_inatomic(desc->arg.buf,
kaddr + offset, size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
if (left == 0)
goto success;
}
@@ -1400,15 +1402,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long seg = 0;
size_t count;
loff_t *ppos = &iocb->ki_pos;
- struct blk_plug plug;
count = 0;
retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
if (retval)
return retval;
- blk_start_plug(&plug);
-
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (filp->f_flags & O_DIRECT) {
loff_t size;
@@ -1424,8 +1423,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
+ struct blk_plug plug;
+
+ blk_start_plug(&plug);
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
+ blk_finish_plug(&plug);
}
if (retval > 0) {
*ppos = pos + retval;
@@ -1481,7 +1484,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
break;
}
out:
- blk_finish_plug(&plug);
return retval;
}
EXPORT_SYMBOL(generic_file_aio_read);
@@ -2045,7 +2047,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
size_t copied;
BUG_ON(!in_atomic());
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
if (likely(i->nr_segs == 1)) {
int left;
char __user *buf = i->iov->iov_base + i->iov_offset;
@@ -2055,7 +2057,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
copied = __iovec_copy_from_user_inatomic(kaddr + offset,
i->iov, i->iov_offset, bytes);
}
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
return copied;
}
@@ -2341,7 +2343,9 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
struct page *page;
gfp_t gfp_notmask = 0;
- gfp_mask = mapping_gfp_mask(mapping) | __GFP_WRITE;
+ gfp_mask = mapping_gfp_mask(mapping);
+ if (mapping_cap_account_dirty(mapping))
+ gfp_mask |= __GFP_WRITE;
if (flags & AOP_FLAG_NOFS)
gfp_notmask = __GFP_FS;
repeat:
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index f91b2f687343..a4eb31132229 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -263,7 +263,12 @@ found:
xip_pfn);
if (err == -ENOMEM)
return VM_FAULT_OOM;
- BUG_ON(err);
+ /*
+ * err == -EBUSY is fine, we've raced against another thread
+ * that faulted-in the same page
+ */
+ if (err != -EBUSY)
+ BUG_ON(err);
return VM_FAULT_NOPAGE;
} else {
int err, ret = VM_FAULT_OOM;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b3ffc21ce801..f0e5306eeb55 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -671,6 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
set_pmd_at(mm, haddr, pmd, entry);
prepare_pmd_huge_pte(pgtable, mm);
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
+ mm->nr_ptes++;
spin_unlock(&mm->page_table_lock);
}
@@ -789,6 +790,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd = pmd_mkold(pmd_wrprotect(pmd));
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
prepare_pmd_huge_pte(pgtable, dst_mm);
+ dst_mm->nr_ptes++;
ret = 0;
out_unlock:
@@ -887,7 +889,6 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
}
kfree(pages);
- mm->nr_ptes++;
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
page_remove_rmap(page);
@@ -1030,31 +1031,23 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
{
int ret = 0;
- spin_lock(&tlb->mm->page_table_lock);
- if (likely(pmd_trans_huge(*pmd))) {
- if (unlikely(pmd_trans_splitting(*pmd))) {
- spin_unlock(&tlb->mm->page_table_lock);
- wait_split_huge_page(vma->anon_vma,
- pmd);
- } else {
- struct page *page;
- pgtable_t pgtable;
- pgtable = get_pmd_huge_pte(tlb->mm);
- page = pmd_page(*pmd);
- pmd_clear(pmd);
- tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
- page_remove_rmap(page);
- VM_BUG_ON(page_mapcount(page) < 0);
- add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
- VM_BUG_ON(!PageHead(page));
- spin_unlock(&tlb->mm->page_table_lock);
- tlb_remove_page(tlb, page);
- pte_free(tlb->mm, pgtable);
- ret = 1;
- }
- } else
+ if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+ struct page *page;
+ pgtable_t pgtable;
+ pgtable = get_pmd_huge_pte(tlb->mm);
+ page = pmd_page(*pmd);
+ pmd_clear(pmd);
+ tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+ page_remove_rmap(page);
+ VM_BUG_ON(page_mapcount(page) < 0);
+ add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+ VM_BUG_ON(!PageHead(page));
+ tlb->mm->nr_ptes--;
spin_unlock(&tlb->mm->page_table_lock);
-
+ tlb_remove_page(tlb, page);
+ pte_free(tlb->mm, pgtable);
+ ret = 1;
+ }
return ret;
}
@@ -1064,21 +1057,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{
int ret = 0;
- spin_lock(&vma->vm_mm->page_table_lock);
- if (likely(pmd_trans_huge(*pmd))) {
- ret = !pmd_trans_splitting(*pmd);
- spin_unlock(&vma->vm_mm->page_table_lock);
- if (unlikely(!ret))
- wait_split_huge_page(vma->anon_vma, pmd);
- else {
- /*
- * All logical pages in the range are present
- * if backed by a huge page.
- */
- memset(vec, 1, (end - addr) >> PAGE_SHIFT);
- }
- } else
+ if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+ /*
+ * All logical pages in the range are present
+ * if backed by a huge page.
+ */
spin_unlock(&vma->vm_mm->page_table_lock);
+ memset(vec, 1, (end - addr) >> PAGE_SHIFT);
+ ret = 1;
+ }
return ret;
}
@@ -1108,20 +1095,11 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
goto out;
}
- spin_lock(&mm->page_table_lock);
- if (likely(pmd_trans_huge(*old_pmd))) {
- if (pmd_trans_splitting(*old_pmd)) {
- spin_unlock(&mm->page_table_lock);
- wait_split_huge_page(vma->anon_vma, old_pmd);
- ret = -1;
- } else {
- pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
- VM_BUG_ON(!pmd_none(*new_pmd));
- set_pmd_at(mm, new_addr, new_pmd, pmd);
- spin_unlock(&mm->page_table_lock);
- ret = 1;
- }
- } else {
+ ret = __pmd_trans_huge_lock(old_pmd, vma);
+ if (ret == 1) {
+ pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
+ VM_BUG_ON(!pmd_none(*new_pmd));
+ set_pmd_at(mm, new_addr, new_pmd, pmd);
spin_unlock(&mm->page_table_lock);
}
out:
@@ -1134,24 +1112,41 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
struct mm_struct *mm = vma->vm_mm;
int ret = 0;
- spin_lock(&mm->page_table_lock);
+ if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+ pmd_t entry;
+ entry = pmdp_get_and_clear(mm, addr, pmd);
+ entry = pmd_modify(entry, newprot);
+ set_pmd_at(mm, addr, pmd, entry);
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+/*
+ * Returns 1 if a given pmd maps a stable (not under splitting) thp.
+ * Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
+ *
+ * Note that if it returns 1, this routine returns without unlocking page
+ * table locks.