From 010fc29a45a2e8dbc08bf45ef80b8622619aaae0 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 20 Dec 2012 15:05:06 -0800 Subject: compaction: fix build error in CMA && !COMPACTION isolate_freepages_block() and isolate_migratepages_range() are used for CMA as well as compaction so it breaks build for CONFIG_CMA && !CONFIG_COMPACTION. This patch fixes it. [akpm@linux-foundation.org: add "do { } while (0)", per Mel] Signed-off-by: Minchan Kim Cc: Mel Gorman Cc: Marek Szyprowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/compaction.c b/mm/compaction.c index 5ad7f4f4d6f7..6b807e466497 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -17,6 +17,21 @@ #include #include "internal.h" +#ifdef CONFIG_COMPACTION +static inline void count_compact_event(enum vm_event_item item) +{ + count_vm_event(item); +} + +static inline void count_compact_events(enum vm_event_item item, long delta) +{ + count_vm_events(item, delta); +} +#else +#define count_compact_event(item) do { } while (0) +#define count_compact_events(item, delta) do { } while (0) +#endif + #if defined CONFIG_COMPACTION || defined CONFIG_CMA #define CREATE_TRACE_POINTS @@ -303,10 +318,9 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (blockpfn == end_pfn) update_pageblock_skip(cc, valid_page, total_isolated, false); - count_vm_events(COMPACTFREE_SCANNED, nr_scanned); + count_compact_events(COMPACTFREE_SCANNED, nr_scanned); if (total_isolated) - count_vm_events(COMPACTISOLATED, total_isolated); - + count_compact_events(COMPACTISOLATED, total_isolated); return total_isolated; } @@ -613,9 +627,9 @@ next_pageblock: trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); - count_vm_events(COMPACTMIGRATE_SCANNED, nr_scanned); + count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned); if (nr_isolated) - count_vm_events(COMPACTISOLATED, nr_isolated); + count_compact_events(COMPACTISOLATED, nr_isolated); return low_pfn; } @@ -1110,7 +1124,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, if (!order || !may_enter_fs || !may_perform_io) return rc; - count_vm_event(COMPACTSTALL); + count_compact_event(COMPACTSTALL); #ifdef CONFIG_CMA if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) -- cgit v1.2.3 From c8b74c2f6604923de91f8aa6539f8bb934736754 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Thu, 20 Dec 2012 15:05:07 -0800 Subject: mm: fix calculation of dirtyable memory The system uses global_dirtyable_memory() to calculate number of dirtyable pages/pages that can be allocated to the page cache. A bug causes an underflow thus making the page count look like a big unsigned number. This in turn confuses the dirty writeback throttling to aggressively write back pages as they become dirty (usually 1 page at a time). This generally only affects systems with highmem because the underflowed count gets subtracted from the global count of dirtyable memory. The problem was introduced with v3.2-4896-gab8fabd Fix is to ensure we don't get an underflowed total of either highmem or global dirtyable memory. Signed-off-by: Sonny Rao Signed-off-by: Puneet Kumar Acked-by: Johannes Weiner Tested-by: Damien Wyart Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6f4271224493..0713bfbf0954 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -200,6 +200,18 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) x += zone_page_state(z, NR_FREE_PAGES) + zone_reclaimable_pages(z) - z->dirty_balance_reserve; } + /* + * Unreclaimable memory (kernel memory or anonymous memory + * without swap) can bring down the dirtyable pages below + * the zone's dirty balance reserve and the above calculation + * will underflow. However we still want to add in nodes + * which are below threshold (negative values) to get a more + * accurate calculation but make sure that the total never + * underflows. + */ + if ((long)x < 0) + x = 0; + /* * Make sure that the number of highmem pages is never larger * than the number of the total dirtyable memory. This can only @@ -222,8 +234,8 @@ static unsigned long global_dirtyable_memory(void) { unsigned long x; - x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() - - dirty_balance_reserve; + x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); + x -= min(x, dirty_balance_reserve); if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); @@ -290,9 +302,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone) * highmem zone can hold its share of dirty pages, so we don't * care about vm_highmem_is_dirtyable here. */ - return zone_page_state(zone, NR_FREE_PAGES) + - zone_reclaimable_pages(zone) - - zone->dirty_balance_reserve; + unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) + + zone_reclaimable_pages(zone); + + /* don't allow this to underflow */ + nr_pages -= min(nr_pages, zone->dirty_balance_reserve); + return nr_pages; } /** -- cgit v1.2.3 From bcc2b02f4c1b36bc67272df7119b75bac78525ab Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 20 Dec 2012 15:05:18 -0800 Subject: mm: cma: WARN if freed memory is still in use Memory returned to free_contig_range() must have no other references. Let kernel to complain loudly if page reference count is not equal to 1. [rientjes@google.com: support sparsemem] Signed-off-by: Marek Szyprowski Reviewed-by: Kyungmin Park Acked-by: Michal Nazarewicz Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2ad2ad168efe..4ba5e37127fc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5978,8 +5978,15 @@ done: void free_contig_range(unsigned long pfn, unsigned nr_pages) { - for (; nr_pages--; ++pfn) - __free_page(pfn_to_page(pfn)); + unsigned int count = 0; + + for (; nr_pages--; pfn++) { + struct page *page = pfn_to_page(pfn); + + count += page_count(page) != 1; + __free_page(page); + } + WARN(count != 0, "%d pages are still in use!\n", count); } #endif -- cgit v1.2.3 From 2c79737af83e0d586899f48c6010148ea2064369 Mon Sep 17 00:00:00 2001 From: Jeremy Eder Date: Thu, 20 Dec 2012 15:05:32 -0800 Subject: mm: clean up transparent hugepage sysfs error messages Clarify error messages and correct a few typos in the transparent hugepage sysfs init code. Signed-off-by: Jeremy Eder Acked-by: Rafael Aquini Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 32754eece63e..9e894edc7811 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -574,19 +574,19 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); if (unlikely(!*hugepage_kobj)) { - printk(KERN_ERR "hugepage: failed kobject create\n"); + printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n"); return -ENOMEM; } err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group); if (err) { - printk(KERN_ERR "hugepage: failed register hugeage group\n"); + printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto delete_obj; } err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group); if (err) { - printk(KERN_ERR "hugepage: failed register hugeage group\n"); + printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto remove_hp_group; } -- cgit v1.2.3 From 154b454edaf6d94a69016db6c342c57fa935bbe9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Dec 2012 15:05:40 -0800 Subject: memcg: don't register hotcpu notifier from ->css_alloc() Commit 648bb56d076b ("cgroup: lock cgroup_mutex in cgroup_init_subsys()") made cgroup_init_subsys() grab cgroup_mutex before invoking ->css_alloc() for the root css. Because memcg registers hotcpu notifier from ->css_alloc() for the root css, this introduced circular locking dependency between cgroup_mutex and cpu hotplug. Fix it by moving hotcpu notifier registration to a subsys initcall. ====================================================== [ INFO: possible circular locking dependency detected ] 3.7.0-rc4-work+ #42 Not tainted ------------------------------------------------------- bash/645 is trying to acquire lock: (cgroup_mutex){+.+.+.}, at: [] cgroup_lock+0x17/0x20 but task is already holding lock: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2f/0x60 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (cpu_hotplug.lock){+.+.+.}: lock_acquire+0x97/0x1e0 mutex_lock_nested+0x61/0x3b0 get_online_cpus+0x3c/0x60 rebuild_sched_domains_locked+0x1b/0x70 cpuset_write_resmask+0x298/0x2c0 cgroup_file_write+0x1ef/0x300 vfs_write+0xa8/0x160 sys_write+0x52/0xa0 system_call_fastpath+0x16/0x1b -> #0 (cgroup_mutex){+.+.+.}: __lock_acquire+0x14ce/0x1d20 lock_acquire+0x97/0x1e0 mutex_lock_nested+0x61/0x3b0 cgroup_lock+0x17/0x20 cpuset_handle_hotplug+0x1b/0x560 cpuset_update_active_cpus+0xe/0x10 cpuset_cpu_inactive+0x47/0x50 notifier_call_chain+0x66/0x150 __raw_notifier_call_chain+0xe/0x10 __cpu_notify+0x20/0x40 _cpu_down+0x7e/0x2f0 cpu_down+0x36/0x50 store_online+0x5d/0xe0 dev_attr_store+0x18/0x30 sysfs_write_file+0xe0/0x150 vfs_write+0xa8/0x160 sys_write+0x52/0xa0 system_call_fastpath+0x16/0x1b other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(cgroup_mutex); lock(cpu_hotplug.lock); lock(cgroup_mutex); *** DEADLOCK *** 5 locks held by bash/645: #0: (&buffer->mutex){+.+.+.}, at: [] sysfs_write_file+0x48/0x150 #1: (s_active#42){.+.+.+}, at: [] sysfs_write_file+0xc8/0x150 #2: (x86_cpu_hotplug_driver_mutex){+.+...}, at: [] cpu_hotplug_driver_lock+0x1 +7/0x20 #3: (cpu_add_remove_lock){+.+.+.}, at: [] cpu_maps_update_begin+0x17/0x20 #4: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x2f/0x60 stack backtrace: Pid: 645, comm: bash Not tainted 3.7.0-rc4-work+ #42 Call Trace: print_circular_bug+0x28e/0x29f __lock_acquire+0x14ce/0x1d20 lock_acquire+0x97/0x1e0 mutex_lock_nested+0x61/0x3b0 cgroup_lock+0x17/0x20 cpuset_handle_hotplug+0x1b/0x560 cpuset_update_active_cpus+0xe/0x10 cpuset_cpu_inactive+0x47/0x50 notifier_call_chain+0x66/0x150 __raw_notifier_call_chain+0xe/0x10 __cpu_notify+0x20/0x40 _cpu_down+0x7e/0x2f0 cpu_down+0x36/0x50 store_online+0x5d/0xe0 dev_attr_store+0x18/0x30 sysfs_write_file+0xe0/0x150 vfs_write+0xa8/0x160 sys_write+0x52/0xa0 system_call_fastpath+0x16/0x1b Signed-off-by: Tejun Heo Reported-by: Fengguang Wu Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f3009b4bae51..09255ec8159c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6090,7 +6090,6 @@ mem_cgroup_css_alloc(struct cgroup *cont) &per_cpu(memcg_stock, cpu); INIT_WORK(&stock->work, drain_local_stock); } - hotcpu_notifier(memcg_cpu_hotplug_callback, 0); } else { parent = mem_cgroup_from_cont(cont->parent); memcg->use_hierarchy = parent->use_hierarchy; @@ -6756,6 +6755,19 @@ struct cgroup_subsys mem_cgroup_subsys = { .use_id = 1, }; +/* + * The rest of init is performed during ->css_alloc() for root css which + * happens before initcalls. hotcpu_notifier() can't be done together as + * it would introduce circular locking by adding cgroup_lock -> cpu hotplug + * dependency. Do it from a subsys_initcall(). + */ +static int __init mem_cgroup_init(void) +{ + hotcpu_notifier(memcg_cpu_hotplug_callback, 0); + return 0; +} +subsys_initcall(mem_cgroup_init); + #ifdef CONFIG_MEMCG_SWAP static int __init enable_swap_account(char *s) { -- cgit v1.2.3