summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig.debug25
-rw-r--r--mm/bootmem.c8
-rw-r--r--mm/compaction.c65
-rw-r--r--mm/filemap.c127
-rw-r--r--mm/huge_memory.c20
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/ksm.c23
-rw-r--r--mm/memblock.c241
-rw-r--r--mm/memcontrol.c661
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/memory.c7
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/migrate.c56
-rw-r--r--mm/nobootmem.c8
-rw-r--r--mm/oom_kill.c63
-rw-r--r--mm/page-writeback.c15
-rw-r--r--mm/page_alloc.c89
-rw-r--r--mm/page_cgroup.c138
-rw-r--r--mm/pagewalk.c24
-rw-r--r--mm/rmap.c80
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c57
-rw-r--r--mm/slob.c6
-rw-r--r--mm/slub.c372
-rw-r--r--mm/swap.c189
-rw-r--r--mm/swapfile.c374
-rw-r--r--mm/truncate.c22
-rw-r--r--mm/vmalloc.c158
-rw-r--r--mm/vmscan.c36
-rw-r--r--mm/vmstat.c9
30 files changed, 1836 insertions, 1052 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index af7cfb43d2f0..8b1a477162dc 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -1,27 +1,24 @@
config DEBUG_PAGEALLOC
bool "Debug page memory allocations"
- depends on DEBUG_KERNEL && ARCH_SUPPORTS_DEBUG_PAGEALLOC
- depends on !HIBERNATION || !PPC && !SPARC
+ depends on DEBUG_KERNEL
+ depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
depends on !KMEMCHECK
+ select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
---help---
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
of memory corruption.
+ For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC,
+ fill the pages with poison patterns after free_pages() and verify
+ the patterns before alloc_pages(). Additionally,
+ this option cannot be enabled in combination with hibernation as
+ that would result in incorrect warnings of memory corruption after
+ a resume because free pages are not saved to the suspend image.
+
config WANT_PAGE_DEBUG_FLAGS
bool
config PAGE_POISONING
- bool "Debug page memory allocations"
- depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC
- depends on !HIBERNATION
- select DEBUG_PAGEALLOC
+ bool
select WANT_PAGE_DEBUG_FLAGS
- ---help---
- Fill the pages with poison patterns after free_pages() and verify
- the patterns before alloc_pages(). This results in a large slowdown,
- but helps to find certain types of memory corruption.
-
- This option cannot be enabled in combination with hibernation as
- that would result in incorrect warnings of memory corruption after
- a resume because free pages are not saved to the suspend image.
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 07aeb89e396e..01d5a4b3dd0c 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -34,14 +34,6 @@ unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
-#ifdef CONFIG_CRASH_DUMP
-/*
- * If we have booted due to a crash, max_pfn will be a very low value. We need
- * to know the amount of memory that the previous kernel used.
- */
-unsigned long saved_max_pfn;
-#endif
-
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
diff --git a/mm/compaction.c b/mm/compaction.c
index 8be430b812de..021a2960ef9e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -42,8 +42,6 @@ struct compact_control {
unsigned int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
-
- int compact_mode;
};
static unsigned long release_freepages(struct list_head *freelist)
@@ -155,7 +153,6 @@ static void isolate_freepages(struct zone *zone,
* pages on cc->migratepages. We stop searching if the migrate
* and free page scanners meet or enough free pages are isolated.
*/
- spin_lock_irqsave(&zone->lock, flags);
for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
pfn -= pageblock_nr_pages) {
unsigned long isolated;
@@ -178,9 +175,19 @@ static void isolate_freepages(struct zone *zone,
if (!suitable_migration_target(page))
continue;
- /* Found a block suitable for isolating free pages from */
- isolated = isolate_freepages_block(zone, pfn, freelist);
- nr_freepages += isolated;
+ /*
+ * Found a block suitable for isolating free pages from. Now
+ * we disabled interrupts, double check things are ok and
+ * isolate the pages. This is to minimise the time IRQs
+ * are disabled
+ */
+ isolated = 0;
+ spin_lock_irqsave(&zone->lock, flags);
+ if (suitable_migration_target(page)) {
+ isolated = isolate_freepages_block(zone, pfn, freelist);
+ nr_freepages += isolated;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
/*
* Record the highest PFN we isolated pages from. When next
@@ -190,7 +197,6 @@ static void isolate_freepages(struct zone *zone,
if (isolated)
high_pfn = max(high_pfn, pfn);
}
- spin_unlock_irqrestore(&zone->lock, flags);
/* split_free_page does not map the pages */
list_for_each_entry(page, freelist, lru) {
@@ -271,9 +277,27 @@ static unsigned long isolate_migratepages(struct zone *zone,
}
/* Time to isolate some pages for migration */
+ cond_resched();
spin_lock_irq(&zone->lru_lock);
for (; low_pfn < end_pfn; low_pfn++) {
struct page *page;
+ bool locked = true;
+
+ /* give a chance to irqs before checking need_resched() */
+ if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+ spin_unlock_irq(&zone->lru_lock);
+ locked = false;
+ }
+ if (need_resched() || spin_is_contended(&zone->lru_lock)) {
+ if (locked)
+ spin_unlock_irq(&zone->lru_lock);
+ cond_resched();
+ spin_lock_irq(&zone->lru_lock);
+ if (fatal_signal_pending(current))
+ break;
+ } else if (!locked)
+ spin_lock_irq(&zone->lru_lock);
+
if (!pfn_valid_within(low_pfn))
continue;
nr_scanned++;
@@ -397,10 +421,7 @@ static int compact_finished(struct zone *zone,
return COMPACT_COMPLETE;
/* Compaction run is not finished if the watermark is not met */
- if (cc->compact_mode != COMPACT_MODE_KSWAPD)
- watermark = low_wmark_pages(zone);
- else
- watermark = high_wmark_pages(zone);
+ watermark = low_wmark_pages(zone);
watermark += (1 << cc->order);
if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
@@ -413,15 +434,6 @@ static int compact_finished(struct zone *zone,
if (cc->order == -1)
return COMPACT_CONTINUE;
- /*
- * Generating only one page of the right order is not enough
- * for kswapd, we must continue until we're above the high
- * watermark as a pool for high order GFP_ATOMIC allocations
- * too.
- */
- if (cc->compact_mode == COMPACT_MODE_KSWAPD)
- return COMPACT_CONTINUE;
-
/* Direct compactor: Is a suitable page free? */
for (order = cc->order; order < MAX_ORDER; order++) {
/* Job done if page is free of the right migratetype */
@@ -508,12 +520,13 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
unsigned long nr_migrate, nr_remaining;
+ int err;
if (!isolate_migratepages(zone, cc))
continue;
nr_migrate = cc->nr_migratepages;
- migrate_pages(&cc->migratepages, compaction_alloc,
+ err = migrate_pages(&cc->migratepages, compaction_alloc,
(unsigned long)cc, false,
cc->sync);
update_nr_listpages(cc);
@@ -527,7 +540,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
nr_remaining);
/* Release LRU pages not migrated */
- if (!list_empty(&cc->migratepages)) {
+ if (err) {
putback_lru_pages(&cc->migratepages);
cc->nr_migratepages = 0;
}
@@ -543,8 +556,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- bool sync,
- int compact_mode)
+ bool sync)
{
struct compact_control cc = {
.nr_freepages = 0,
@@ -553,7 +565,6 @@ unsigned long compact_zone_order(struct zone *zone,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
.sync = sync,
- .compact_mode = compact_mode,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -599,8 +610,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
nodemask) {
int status;
- status = compact_zone_order(zone, order, gfp_mask, sync,
- COMPACT_MODE_DIRECT_RECLAIM);
+ status = compact_zone_order(zone, order, gfp_mask, sync);
rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */
@@ -631,7 +641,6 @@ static int compact_node(int nid)
.nr_freepages = 0,
.nr_migratepages = 0,
.order = -1,
- .compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
};
zone = &pgdat->node_zones[zoneid];
diff --git a/mm/filemap.c b/mm/filemap.c
index 83a45d35468b..f807afda86f2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -108,11 +108,11 @@
*/
/*
- * Remove a page from the page cache and free it. Caller has to make
+ * Delete a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
* is safe. The caller must hold the mapping's tree_lock.
*/
-void __remove_from_page_cache(struct page *page)
+void __delete_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -137,7 +137,15 @@ void __remove_from_page_cache(struct page *page)
}
}
-void remove_from_page_cache(struct page *page)
+/**
+ * delete_from_page_cache - delete page from page cache
+ * @page: the page which the kernel is trying to remove from page cache
+ *
+ * This must be called only on pages that have been verified to be in the page
+ * cache and locked. It will never put the page into the free list, the caller
+ * has a reference on the page.
+ */
+void delete_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
void (*freepage)(struct page *);
@@ -146,14 +154,15 @@ void remove_from_page_cache(struct page *page)
freepage = mapping->a_ops->freepage;
spin_lock_irq(&mapping->tree_lock);
- __remove_from_page_cache(page);
+ __delete_from_page_cache(page);
spin_unlock_irq(&mapping->tree_lock);
mem_cgroup_uncharge_cache_page(page);
if (freepage)
freepage(page);
+ page_cache_release(page);
}
-EXPORT_SYMBOL(remove_from_page_cache);
+EXPORT_SYMBOL(delete_from_page_cache);
static int sync_page(void *word)
{
@@ -387,6 +396,76 @@ int filemap_write_and_wait_range(struct address_space *mapping,
EXPORT_SYMBOL(filemap_write_and_wait_range);
/**
+ * replace_page_cache_page - replace a pagecache page with a new one
+ * @old: page to be replaced
+ * @new: page to replace with
+ * @gfp_mask: allocation mode
+ *
+ * This function replaces a page in the pagecache with a new one. On
+ * success it acquires the pagecache reference for the new page and
+ * drops it for the old page. Both the old and new pages must be
+ * locked. This function does not add the new page to the LRU, the
+ * caller must do that.
+ *
+ * The remove + add is atomic. The only way this function can fail is
+ * memory allocation failure.
+ */
+int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
+{
+ int error;
+ struct mem_cgroup *memcg = NULL;
+
+ VM_BUG_ON(!PageLocked(old));
+ VM_BUG_ON(!PageLocked(new));
+ VM_BUG_ON(new->mapping);
+
+ /*
+ * This is not page migration, but prepare_migration and
+ * end_migration does enough work for charge replacement.
+ *
+ * In the longer term we probably want a specialized function
+ * for moving the charge from old to new in a more efficient
+ * manner.
+ */
+ error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
+ if (error)
+ return error;
+
+ error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+ if (!error) {
+ struct address_space *mapping = old->mapping;
+ void (*freepage)(struct page *);
+
+ pgoff_t offset = old->index;
+ freepage = mapping->a_ops->freepage;
+
+ page_cache_get(new);
+ new->mapping = mapping;
+ new->index = offset;
+
+ spin_lock_irq(&mapping->tree_lock);
+ __delete_from_page_cache(old);
+ error = radix_tree_insert(&mapping->page_tree, offset, new);
+ BUG_ON(error);
+ mapping->nrpages++;
+ __inc_zone_page_state(new, NR_FILE_PAGES);
+ if (PageSwapBacked(new))
+ __inc_zone_page_state(new, NR_SHMEM);
+ spin_unlock_irq(&mapping->tree_lock);
+ radix_tree_preload_end();
+ if (freepage)
+ freepage(old);
+ page_cache_release(old);
+ mem_cgroup_end_migration(memcg, old, new, true);
+ } else {
+ mem_cgroup_end_migration(memcg, old, new, false);
+ }
+
+ return error;
+}
+EXPORT_SYMBOL_GPL(replace_page_cache_page);
+
+/**
* add_to_page_cache_locked - add a locked page to the pagecache
* @page: page to add
* @mapping: the page's address_space
@@ -621,8 +700,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
__lock_page(page);
return 1;
} else {
- up_read(&mm->mmap_sem);
- wait_on_page_locked(page);
+ if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ up_read(&mm->mmap_sem);
+ wait_on_page_locked(page);
+ }
return 0;
}
}
@@ -782,9 +863,13 @@ repeat:
page = radix_tree_deref_slot((void **)pages[i]);
if (unlikely(!page))
continue;
+
+ /*
+ * This can only trigger when the entry at index 0 moves out
+ * of or back to the root: none yet gotten, safe to restart.
+ */
if (radix_tree_deref_retry(page)) {
- if (ret)
- start = pages[ret-1]->index;
+ WARN_ON(start | i);
goto restart;
}
@@ -800,6 +885,13 @@ repeat:
pages[ret] = page;
ret++;
}
+
+ /*
+ * If all entries were removed before we could secure them,
+ * try again, because callers stop trying once 0 is returned.
+ */
+ if (unlikely(!ret && nr_found))
+ goto restart;
rcu_read_unlock();
return ret;
}
@@ -834,6 +926,11 @@ repeat:
page = radix_tree_deref_slot((void **)pages[i]);
if (unlikely(!page))
continue;
+
+ /*
+ * This can only trigger when the entry at index 0 moves out
+ * of or back to the root: none yet gotten, safe to restart.
+ */
if (radix_tree_deref_retry(page))
goto restart;
@@ -894,6 +991,11 @@ repeat:
page = radix_tree_deref_slot((void **)pages[i]);
if (unlikely(!page))
continue;
+
+ /*
+ * This can only trigger when the entry at index 0 moves out
+ * of or back to the root: none yet gotten, safe to restart.
+ */
if (radix_tree_deref_retry(page))
goto restart;
@@ -909,6 +1011,13 @@ repeat:
pages[ret] = page;
ret++;
}
+
+ /*
+ * If all entries were removed before we could secure them,
+ * try again, because callers stop trying once 0 is returned.
+ */
+ if (unlikely(!ret && nr_found))
+ goto restart;
rcu_read_unlock();
if (ret)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 113e35c47502..0a619e0e2e0b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -643,23 +643,24 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
return ret;
}
-static inline gfp_t alloc_hugepage_gfpmask(int defrag)
+static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
{
- return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
+ return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
}
static inline struct page *alloc_hugepage_vma(int defrag,
struct vm_area_struct *vma,
- unsigned long haddr, int nd)
+ unsigned long haddr, int nd,
+ gfp_t extra_gfp)
{
- return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
+ return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp),
HPAGE_PMD_ORDER, vma, haddr, nd);
}
#ifndef CONFIG_NUMA
static inline struct page *alloc_hugepage(int defrag)
{
- return alloc_pages(alloc_hugepage_gfpmask(defrag),
+ return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
HPAGE_PMD_ORDER);
}
#endif
@@ -678,7 +679,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(khugepaged_enter(vma)))
return VM_FAULT_OOM;
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
- vma, haddr, numa_node_id());
+ vma, haddr, numa_node_id(), 0);
if (unlikely(!page))
goto out;
if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -799,7 +800,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
}
for (i = 0; i < HPAGE_PMD_NR; i++) {
- pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE,
+ pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
+ __GFP_OTHER_NODE,
vma, address, page_to_nid(page));
if (unlikely(!pages[i] ||
mem_cgroup_newpage_charge(pages[i], mm,
@@ -902,7 +904,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow())
new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
- vma, haddr, numa_node_id());
+ vma, haddr, numa_node_id(), 0);
else
new_page = NULL;
@@ -1779,7 +1781,7 @@ static void collapse_huge_page(struct mm_struct *mm,
* scalability.
*/
new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
- node);
+ node, __GFP_OTHER_NODE);
if (unlikely(!new_page)) {
up_read(&mm->mmap_sem);
*hpage = ERR_PTR(-ENOMEM);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb0b7c128015..06de5aa4d644 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1872,8 +1872,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
unsigned long tmp;
int ret;
- if (!write)
- tmp = h->max_huge_pages;
+ tmp = h->max_huge_pages;
if (write && h->order >= MAX_ORDER)
return -EINVAL;
@@ -1938,8 +1937,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
unsigned long tmp;
int ret;
- if (!write)
- tmp = h->nr_overcommit_huge_pages;
+ tmp = h->nr_overcommit_huge_pages;
if (write && h->order >= MAX_ORDER)
return -EINVAL;
diff --git a/mm/ksm.c b/mm/ksm.c
index c2b2a94f9d67..1bbe785aa559 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -301,20 +301,6 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
return rmap_item->address & STABLE_FLAG;
}
-static void hold_anon_vma(struct rmap_item *rmap_item,
- struct anon_vma *anon_vma)
-{
- rmap_item->anon_vma = anon_vma;
- get_anon_vma(anon_vma);
-}
-
-static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
-{
- struct anon_vma *anon_vma = rmap_item->anon_vma;
-
- drop_anon_vma(anon_vma);
-}
-
/*
* ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
* page tables after it has passed through ksm_exit() - which, if necessary,
@@ -397,7 +383,7 @@ static void break_cow(struct rmap_item *rmap_item)
* It is not an accident that whenever we want to break COW
* to undo, we also need to drop a reference to the anon_vma.
*/
- ksm_drop_anon_vma(rmap_item);
+ put_anon_vma(rmap_item->anon_vma);
down_read(&mm->mmap_sem);
if (ksm_test_exit(mm))
@@ -466,7 +452,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
ksm_pages_sharing--;
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ put_anon_vma(rmap_item->anon_vma);
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -554,7 +540,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ put_anon_vma(rmap_item->anon_vma);
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
@@ -949,7 +935,8 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
goto out;
/* Must get reference to anon_vma while still holding mmap_sem */
- hold_anon_vma(rmap_item, vma->anon_vma);
+ rmap_item->anon_vma = vma->anon_vma;
+ get_anon_vma(vma->anon_vma);
out:
up_read(&mm->mmap_sem);
return err;
diff --git a/mm/memblock.c b/mm/memblock.c
index 4618fda975a0..a0562d1a6ad4 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -58,28 +58,6 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p
return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}
-static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
- phys_addr_t base2, phys_addr_t size2)
-{
- if (base2 == base1 + size1)
- return 1;
- else if (base1 == base2 + size2)
- return -1;
-
- return 0;
-}
-
-static long __init_memblock memblock_regions_adjacent(struct memblock_type *type,
- unsigned long r1, unsigned long r2)
-{
- phys_addr_t base1 = type->regions[r1].base;
- phys_addr_t size1 = type->regions[r1].size;
- phys_addr_t base2 = type->regions[r2].base;
- phys_addr_t size2 = type->regions[r2].size;
-
- return memblock_addrs_adjacent(base1, size1, base2, size2);
-}
-
long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
{
unsigned long i;
@@ -206,14 +184,13 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
type->regions[i].size = type->regions[i + 1].size;
}
type->cnt--;
-}
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void __init_memblock memblock_coalesce_regions(struct memblock_type *type,
- unsigned long r1, unsigned long r2)
-{
- type->regions[r1].size += type->regions[r2].size;
- memblock_remove_region(type, r2);
+ /* Special case for empty arrays */
+ if (type->cnt == 0) {
+ type->cnt = 1;
+ type->regions[0].base = 0;
+ type->regions[0].size = 0;
+ }
}
/* Defined below but needed now */
@@ -276,7 +253,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
return 0;
/* Add the new reserved region now. Should not fail ! */
- BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size) < 0);
+ BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size));
/* If the array wasn't our static init one, then free it. We only do
* that before SLAB is available as later on, we don't know whether
@@ -296,58 +273,99 @@ extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1
return 1;
}
-static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
+static long __init_memblock memblock_add_region(struct memblock_type *type,
+ phys_addr_t base, phys_addr_t size)
{
- unsigned long coalesced = 0;
- long adjacent, i;
-
- if ((type->cnt == 1) && (type->regions[0].size == 0)) {
- type->regions[0].base = base;
- type->regions[0].size = size;
- return 0;
- }
+ phys_addr_t end = base + size;
+ int i, slot = -1;
- /* First try and coalesce this MEMBLOCK with another. */
+ /* First try and coalesce this MEMBLOCK with others */
for (i = 0; i < type->cnt; i++) {
- phys_addr_t rgnbase = type->regions[i].base;
- phys_addr_t rgnsize = type->regions[i].size;
+ struct memblock_region *rgn = &type->regions[i];
+ phys_addr_t rend = rgn->base + rgn->size;
+
+ /* Exit if there's no possible hits */
+ if (rgn->base > end || rgn->size == 0)
+ break;
- if ((rgnbase == base) && (rgnsize == size))
- /* Already have this region, so we're done */
+ /* Check if we are fully enclosed within an existing
+ * block
+ */
+ if (rgn->base <= base && rend >= end)
return 0;
- adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
- /* Check if arch allows coalescing */
- if (adjacent != 0 && type == &memblock.memory &&
- !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize))
- break;
- if (adjacent > 0) {
- type->regions[i].base -= size;
- type->regions[i].size += size;
- coalesced++;
- break;
- } else if (adjacent < 0) {
- type->regions[i].size += size;
- coalesced++;
- break;
+ /* Check if we overlap or are adjacent with the bottom
+ * of a block.
+ */
+ if (base < rgn->base && end >= rgn->base) {
+ /* If we can't coalesce, create a new block */
+ if (!memblock_memory_can_coalesce(base, size,
+ rgn->base,
+ rgn->size)) {
+ /* Overlap & can't coalesce are mutually
+ * exclusive, if you do that, be prepared
+ * for trouble
+ */
+ WARN_ON(end != rgn->base);
+ goto new_block;
+ }
+ /* We extend the bottom of the block down to our
+ * base
+ */
+ rgn->base = base;
+ rgn->size = rend - base;
+
+ /* Return if we have nothing else to allocate
+ * (fully coalesced)
+ */
+ if (rend >= end)
+ return 0;
+
+ /* We continue processing from the end of the
+ * coalesced block.
+ */
+ base = rend;
+ size = end - base;
+ }
+
+ /* Now check if we overlap or are adjacent with the
+ * top of a block
+ */
+ if (base <= rend && end >= rend) {
+ /* If we can't coalesce, create a new block */
+ if (!memblock_memory_can_coalesce(rgn->base,
+ rgn->size,
+ base, size)) {
+ /* Overlap & can't coalesce are mutually
+ * exclusive, if you do that, be prepared
+ * for trouble
+ */
+ WARN_ON(rend != base);
+ goto new_block;
+ }
+ /* We adjust our base down to enclose the
+ * original block and destroy it. It will be
+ * part of our new allocation. Since we've
+ * freed an entry, we know we won't fail
+ * to allocate one later, so we won't risk
+ * losing the original block allocation.
+ */
+ size += (base - rgn->base);
+ base = rgn->base;
+ memblock_remove_region(type, i--);
}
}
- /* If we plugged a hole, we may want to also coalesce with the
- * next region
+ /* If the array is empty, special case, replace the fake
+ * filler region and return
*/
- if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) &&
- ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base,
- type->regions[i].size,
- type->regions[i+1].base,
- type->regions[i+1].size)))) {
- memblock_coalesce_regions(type, i, i+1);
- coalesced++;
+ if ((type->cnt == 1) && (type->regions[0].size == 0)) {
+ type->regions[0].base = base;
+ type->regions[0].size = size;
+ return 0;
}
- if (coalesced)
- return coalesced;
-
+ new_block:
/* If we are out of space, we fail. It's too late to resize the array
* but then this shouldn't have happened in the first place.
*/
@@ -362,13 +380,14 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys
} else {
type->regions[i+1].base = base;
type->regions[i+1].size = size;
+ slot = i + 1;
break;
}
}
-
if (base < type->regions[0].base) {
type->regions[0].base = base;
type->regions[0].size = size;
+ slot = 0;
}
type->cnt++;
@@ -376,7 +395,8 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys
* our allocation and return an error
*/
if (type->cnt == type->max && memblock_double_array(type)) {
- type->cnt--;
+ BUG_ON(slot < 0);
+ memblock_remove_region(type, slot);
return -1;
}
@@ -389,52 +409,55 @@ long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
}
-static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
+static long __init_memblock __memblock_remove(struct memblock_type *type,
+ phys_addr_t base, phys_addr_t size)
{
- phys_addr_t rgnbegin, rgnend;
phys_addr_t end = base + size;
int i;
- rgnbegin = rgnend = 0; /* supress gcc warnings */
-
- /* Find the region where (base, size) belongs to */
- for (i=0; i < type->cnt; i++) {
- rgnbegin = type->regions[i].base;
- rgnend = rgnbegin + type->regions[i].size;
+ /* Walk through the array for collisions */
+ for (i = 0; i < type->cnt; i++) {
+ struct memblock_region *rgn = &type->regions[i];
+ phys_addr_t rend = rgn->base + rgn->size;
- if ((rgnbegin <= base) && (end <= rgnend))
+ /* Nothing more to do, exit */
+ if (rgn->base >