summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-19 11:27:32 +0200
committerIngo Molnar <mingo@elte.hu>2009-09-19 11:28:41 +0200
commit929bf0d0156562ce631728b6fa53d68004d456d2 (patch)
tree739063990a8077b29ef97e69d73bce94573daae4 /kernel
parentdef0a9b2573e00ab0b486cb5382625203ab4c4a6 (diff)
parent202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)
Merge branch 'linus' into perfcounters/core
Merge reason: Bring in tracing changes we depend on. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cpu.c15
-rw-r--r--kernel/cred.c3
-rw-r--r--kernel/dma-coherent.c176
-rw-r--r--kernel/gcov/Kconfig2
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_counter.c7
-rw-r--r--kernel/power/Kconfig14
-rw-r--r--kernel/power/hibernate.c21
-rw-r--r--kernel/power/main.c17
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/snapshot.c412
-rw-r--r--kernel/printk.c6
-rw-r--r--kernel/sched.c448
-rw-r--r--kernel/sched_debug.c1
-rw-r--r--kernel/sched_fair.c414
-rw-r--r--kernel/sched_features.h122
-rw-r--r--kernel/sched_idletask.c4
-rw-r--r--kernel/sched_rt.c7
-rw-r--r--kernel/smp.c40
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/sysctl.c14
-rw-r--r--kernel/taskstats.c10
-rw-r--r--kernel/trace/Kconfig28
-rw-r--r--kernel/trace/ftrace.c158
-rw-r--r--kernel/trace/ring_buffer.c17
-rw-r--r--kernel/trace/trace.c127
-rw-r--r--kernel/trace/trace.h276
-rw-r--r--kernel/trace/trace_boot.c8
-rw-r--r--kernel/trace/trace_clock.c24
-rw-r--r--kernel/trace/trace_entries.h383
-rw-r--r--kernel/trace/trace_event_profile.c5
-rw-r--r--kernel/trace/trace_event_types.h178
-rw-r--r--kernel/trace/trace_events.c85
-rw-r--r--kernel/trace/trace_events_filter.c41
-rw-r--r--kernel/trace/trace_export.c284
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_functions_graph.c66
-rw-r--r--kernel/trace/trace_irqsoff.c16
-rw-r--r--kernel/trace/trace_mmiotrace.c10
-rw-r--r--kernel/trace/trace_output.c42
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--kernel/trace/trace_sched_wakeup.c52
44 files changed, 2009 insertions, 1543 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index b833bd5cc127..3d9c7e27e3f9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -90,7 +90,6 @@ obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
-obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
@@ -117,7 +116,7 @@ $(obj)/config_data.gz: .config FORCE
$(call if_changed,gzip)
quiet_cmd_ikconfiggz = IKCFG $@
- cmd_ikconfiggz = (echo "static const char kernel_config_data[] = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
+ cmd_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
targets += config_data.h
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
$(call if_changed,ikconfiggz)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8ce10043e4ac..6ba0f1ecb212 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -401,6 +401,7 @@ int disable_nonboot_cpus(void)
break;
}
}
+
if (!error) {
BUG_ON(num_online_cpus() > 1);
/* Make sure the CPUs won't be enabled by someone else */
@@ -413,6 +414,14 @@ int disable_nonboot_cpus(void)
return error;
}
+void __weak arch_enable_nonboot_cpus_begin(void)
+{
+}
+
+void __weak arch_enable_nonboot_cpus_end(void)
+{
+}
+
void __ref enable_nonboot_cpus(void)
{
int cpu, error;
@@ -424,6 +433,9 @@ void __ref enable_nonboot_cpus(void)
goto out;
printk("Enabling non-boot CPUs ...\n");
+
+ arch_enable_nonboot_cpus_begin();
+
for_each_cpu(cpu, frozen_cpus) {
error = _cpu_up(cpu, 1);
if (!error) {
@@ -432,6 +444,9 @@ void __ref enable_nonboot_cpus(void)
}
printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
}
+
+ arch_enable_nonboot_cpus_end();
+
cpumask_clear(frozen_cpus);
out:
cpu_maps_update_done();
diff --git a/kernel/cred.c b/kernel/cred.c
index 006fcab009d5..d7f7a01082eb 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -147,7 +147,8 @@ static void put_cred_rcu(struct rcu_head *rcu)
key_put(cred->thread_keyring);
key_put(cred->request_key_auth);
release_tgcred(cred);
- put_group_info(cred->group_info);
+ if (cred->group_info)
+ put_group_info(cred->group_info);
free_uid(cred->user);
kmem_cache_free(cred_jar, cred);
}
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
deleted file mode 100644
index 962a3b574f21..000000000000
--- a/kernel/dma-coherent.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Coherent per-device memory handling.
- * Borrowed from i386
- */
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-
-struct dma_coherent_mem {
- void *virt_base;
- u32 device_base;
- int size;
- int flags;
- unsigned long *bitmap;
-};
-
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void __iomem *mem_base = NULL;
- int pages = size >> PAGE_SHIFT;
- int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
- goto out;
- if (!size)
- goto out;
- if (dev->dma_mem)
- goto out;
-
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
- mem_base = ioremap(bus_addr, size);
- if (!mem_base)
- goto out;
-
- dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dev->dma_mem)
- goto out;
- dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!dev->dma_mem->bitmap)
- goto free1_out;
-
- dev->dma_mem->virt_base = mem_base;
- dev->dma_mem->device_base = device_addr;
- dev->dma_mem->size = pages;
- dev->dma_mem->flags = flags;
-
- if (flags & DMA_MEMORY_MAP)
- return DMA_MEMORY_MAP;
-
- return DMA_MEMORY_IO;
-
- free1_out:
- kfree(dev->dma_mem);
- out:
- if (mem_base)
- iounmap(mem_base);
- return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
-
- if (!mem)
- return;
- dev->dma_mem = NULL;
- iounmap(mem->virt_base);
- kfree(mem->bitmap);
- kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- int pos, err;
-
- size += device_addr & ~PAGE_MASK;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
-/**
- * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area
- *
- * @dev: device from which we allocate memory
- * @size: size of requested memory area
- * @dma_handle: This will be filled with the correct dma handle
- * @ret: This pointer will be filled with the virtual address
- * to allocated area.
- *
- * This function should be only called from per-arch dma_alloc_coherent()
- * to support allocation from per-device coherent memory pools.
- *
- * Returns 0 if dma_alloc_coherent should continue with allocating from
- * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
- */
-int dma_alloc_from_coherent(struct device *dev, ssize_t size,
- dma_addr_t *dma_handle, void **ret)
-{
- struct dma_coherent_mem *mem;
- int order = get_order(size);
- int pageno;
-
- if (!dev)
- return 0;
- mem = dev->dma_mem;
- if (!mem)
- return 0;
-
- *ret = NULL;
-
- if (unlikely(size > (mem->size << PAGE_SHIFT)))
- goto err;
-
- pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
- if (unlikely(pageno < 0))
- goto err;
-
- /*
- * Memory was found in the per-device area.
- */
- *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
- *ret = mem->virt_base + (pageno << PAGE_SHIFT);
- memset(*ret, 0, size);
-
- return 1;
-
-err:
- /*
- * In the case where the allocation can not be satisfied from the
- * per-device area, try to fall back to generic memory if the
- * constraints allow it.
- */
- return mem->flags & DMA_MEMORY_EXCLUSIVE;
-}
-EXPORT_SYMBOL(dma_alloc_from_coherent);
-
-/**
- * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
- * @dev: device from which the memory was allocated
- * @order: the order of pages allocated
- * @vaddr: virtual address of allocated pages
- *
- * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, releases that memory.
- *
- * Returns 1 if we correctly released the memory, or 0 if
- * dma_release_coherent() should proceed with releasing memory from
- * generic pools.
- */
-int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
- if (mem && vaddr >= mem->virt_base && vaddr <
- (mem->virt_base + (mem->size << PAGE_SHIFT))) {
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
- bitmap_release_region(mem->bitmap, page, order);
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(dma_release_from_coherent);
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 22e9dcfaa3d3..654efd09f6a9 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -34,7 +34,7 @@ config GCOV_KERNEL
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
depends on GCOV_KERNEL
- depends on S390 || X86
+ depends on S390 || X86 || (PPC && EXPERIMENTAL)
default n
---help---
This options activates profiling for the entire kernel.
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 49da79ab8486..05071bf6a37b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -485,6 +485,7 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
debug_object_init_on_stack(timer, &hrtimer_debug_descr);
__hrtimer_init(timer, clock_id, mode);
}
+EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
void destroy_hrtimer_on_stack(struct hrtimer *timer)
{
@@ -1477,6 +1478,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
sl->timer.function = hrtimer_wakeup;
sl->task = task;
}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
{
diff --git a/kernel/module.c b/kernel/module.c
index 46580edff0cb..05ce49ced8f6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -369,7 +369,7 @@ EXPORT_SYMBOL_GPL(find_module);
#ifdef CONFIG_SMP
-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
static void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name)
@@ -394,7 +394,7 @@ static void percpu_modfree(void *freeme)
free_percpu(freeme);
}
-#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
/* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -540,7 +540,7 @@ static int percpu_modinit(void)
}
__initcall(percpu_modinit);
-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 06d233a06da5..d013f4e89e9c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -106,16 +106,16 @@ hw_perf_group_sched_in(struct perf_counter *group_leader,
void __weak perf_counter_print_debug(void) { }
-static DEFINE_PER_CPU(int, disable_count);
+static DEFINE_PER_CPU(int, perf_disable_count);
void __perf_disable(void)
{
- __get_cpu_var(disable_count)++;
+ __get_cpu_var(perf_disable_count)++;
}
bool __perf_enable(void)
{
- return !--__get_cpu_var(disable_count);
+ return !--__get_cpu_var(perf_disable_count);
}
void perf_disable(void)
@@ -4246,6 +4246,7 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr,
if (val)
goto err_size;
}
+ size = sizeof(*attr);
}
ret = copy_from_user(attr, uattr, size);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 72067cbdb37f..91e09d3b2eb2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -208,3 +208,17 @@ config APM_EMULATION
random kernel OOPSes or reboots that don't seem to be related to
anything, try disabling/enabling this option (or disabling/enabling
APM in your BIOS).
+
+config PM_RUNTIME
+ bool "Run-time PM core functionality"
+ depends on PM
+ ---help---
+ Enable functionality allowing I/O devices to be put into energy-saving
+ (low power) states at run time (or autosuspended) after a specified
+ period of inactivity and woken up in response to a hardware-generated
+ wake-up event or a driver's request.
+
+ Hardware support is generally required for this functionality to work
+ and the bus type drivers of the buses the devices are on are
+ responsible for the actual handling of the autosuspend requests and
+ wake-up events.
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 81d2e7464893..04b3a83d686f 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -298,8 +298,8 @@ int hibernation_snapshot(int platform_mode)
if (error)
return error;
- /* Free memory before shutting down devices. */
- error = swsusp_shrink_memory();
+ /* Preallocate image memory before shutting down devices. */
+ error = hibernate_preallocate_memory();
if (error)
goto Close;
@@ -315,6 +315,10 @@ int hibernation_snapshot(int platform_mode)
/* Control returns here after successful restore */
Resume_devices:
+ /* We may need to release the preallocated image pages here. */
+ if (error || !in_suspend)
+ swsusp_free();
+
dpm_resume_end(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
resume_console();
@@ -460,11 +464,11 @@ int hibernation_platform_enter(void)
error = hibernation_ops->prepare();
if (error)
- goto Platofrm_finish;
+ goto Platform_finish;
error = disable_nonboot_cpus();
if (error)
- goto Platofrm_finish;
+ goto Platform_finish;
local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
@@ -476,7 +480,7 @@ int hibernation_platform_enter(void)
* We don't need to reenable the nonboot CPUs or resume consoles, since
* the system is going to be halted anyway.
*/
- Platofrm_finish:
+ Platform_finish:
hibernation_ops->finish();
dpm_suspend_noirq(PMSG_RESTORE);
@@ -578,7 +582,10 @@ int hibernate(void)
goto Thaw;
error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
- if (in_suspend && !error) {
+ if (error)
+ goto Thaw;
+
+ if (in_suspend) {
unsigned int flags = 0;
if (hibernation_mode == HIBERNATION_PLATFORM)
@@ -590,8 +597,8 @@ int hibernate(void)
power_down();
} else {
pr_debug("PM: Image restored successfully.\n");
- swsusp_free();
}
+
Thaw:
thaw_processes();
Finish:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f710e36930cc..347d2cc88cd0 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -11,6 +11,7 @@
#include <linux/kobject.h>
#include <linux/string.h>
#include <linux/resume-trace.h>
+#include <linux/workqueue.h>
#include "power.h"
@@ -217,8 +218,24 @@ static struct attribute_group attr_group = {
.attrs = g,
};
+#ifdef CONFIG_PM_RUNTIME
+struct workqueue_struct *pm_wq;
+
+static int __init pm_start_workqueue(void)
+{
+ pm_wq = create_freezeable_workqueue("pm");
+
+ return pm_wq ? 0 : -ENOMEM;
+}
+#else
+static inline int pm_start_workqueue(void) { return 0; }
+#endif
+
static int __init pm_init(void)
{
+ int error = pm_start_workqueue();
+ if (error)
+ return error;
power_kobj = kobject_create_and_add("power", NULL);
if (!power_kobj)
return -ENOMEM;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 26d5a26f82e3..46c5a26630a3 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void);
extern int create_basic_memory_bitmaps(void);
extern void free_basic_memory_bitmaps(void);
-extern int swsusp_shrink_memory(void);
+extern int hibernate_preallocate_memory(void);
/**
* Auxiliary structure used for reading the snapshot image data and
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 523a451b45d3..97955b0e44f4 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -233,7 +233,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
#define BM_END_OF_MAP (~0UL)
-#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
+#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE)
struct bm_block {
struct list_head hook; /* hook into a list of bitmap blocks */
@@ -275,7 +275,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
/**
* create_bm_block_list - create a list of block bitmap objects
- * @nr_blocks - number of blocks to allocate
+ * @pages - number of pages to track
* @list - list to put the allocated blocks into
* @ca - chain allocator to be used for allocating memory
*/
@@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(void)
struct zone *zone;
unsigned int n = 0;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
unsigned long pfn, max_zone_pfn;
if (!is_highmem(zone))
@@ -916,7 +916,7 @@ static unsigned int count_data_pages(void)
unsigned long pfn, max_zone_pfn;
unsigned int n = 0;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
if (is_highmem(zone))
continue;
@@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
struct zone *zone;
unsigned long pfn;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
unsigned long max_zone_pfn;
mark_free_pages(zone);
@@ -1033,6 +1033,25 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
static unsigned int nr_copy_pages;
/* Number of pages needed for saving the original pfns of the image pages */
static unsigned int nr_meta_pages;
+/*
+ * Numbers of normal and highmem page frames allocated for hibernation image
+ * before suspending devices.
+ */
+unsigned int alloc_normal, alloc_highmem;
+/*
+ * Memory bitmap used for marking saveable pages (during hibernation) or
+ * hibernation image pages (during restore)
+ */
+static struct memory_bitmap orig_bm;
+/*
+ * Memory bitmap used during hibernation for marking allocated page frames that
+ * will contain copies of saveable pages. During restore it is initially used
+ * for marking hibernation image pages, but then the set bits from it are
+ * duplicated in @orig_bm and it is released. On highmem systems it is next
+ * used for marking "safe" highmem pages, but it has to be reinitialized for
+ * this purpose.
+ */
+static struct memory_bitmap copy_bm;
/**
* swsusp_free - free pages allocated for the suspend.
@@ -1046,7 +1065,7 @@ void swsusp_free(void)
struct zone *zone;
unsigned long pfn, max_zone_pfn;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn)) {
@@ -1064,74 +1083,286 @@ void swsusp_free(void)
nr_meta_pages = 0;
restore_pblist = NULL;
buffer = NULL;
+ alloc_normal = 0;
+ alloc_highmem = 0;
}
+/* Helper functions used for the shrinking of memory. */
+
+#define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN)
+
/**
- * swsusp_shrink_memory - Try to free as much memory as needed
- *
- * ... but do not OOM-kill anyone
+ * preallocate_image_pages - Allocate a number of pages for hibernation image
+ * @nr_pages: Number of page frames to allocate.
+ * @mask: GFP flags to use for the allocation.
*
- * Notice: all userland should be stopped before it is called, or
- * livelock is possible.
+ * Return value: Number of page frames actually allocated
+ */
+static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
+{
+ unsigned long nr_alloc = 0;
+
+ while (nr_pages > 0) {
+ struct page *page;
+
+ page = alloc_image_page(mask);
+ if (!page)
+ break;
+ memory_bm_set_bit(&copy_bm, page_to_pfn(page));
+ if (PageHighMem(page))
+ alloc_highmem++;
+ else
+ alloc_normal++;
+ nr_pages--;
+ nr_alloc++;
+ }
+
+ return nr_alloc;
+}
+
+static unsigned long preallocate_image_memory(unsigned long nr_pages)
+{
+ return preallocate_image_pages(nr_pages, GFP_IMAGE);
+}
+
+#ifdef CONFIG_HIGHMEM
+static unsigned long preallocate_image_highmem(unsigned long nr_pages)
+{
+ return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
+}
+
+/**
+ * __fraction - Compute (an approximation of) x * (multiplier / base)
*/
+static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
+{
+ x *= multiplier;
+ do_div(x, base);
+ return (unsigned long)x;
+}
+
+static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+ unsigned long highmem,
+ unsigned long total)
+{
+ unsigned long alloc = __fraction(nr_pages, highmem, total);
-#define SHRINK_BITE 10000
-static inline unsigned long __shrink_memory(long tmp)
+ return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
+}
+#else /* CONFIG_HIGHMEM */
+static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
{
- if (tmp > SHRINK_BITE)
- tmp = SHRINK_BITE;
- return shrink_all_memory(tmp);
+ return 0;
}
-int swsusp_shrink_memory(void)
+static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+ unsigned long highmem,
+ unsigned long total)
+{
+ return 0;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * free_unnecessary_pages - Release preallocated pages not needed for the image
+ */
+static void free_unnecessary_pages(void)
+{
+ unsigned long save_highmem, to_free_normal, to_free_highmem;
+
+ to_free_normal = alloc_normal - count_data_pages();
+ save_highmem = count_highmem_pages();
+ if (alloc_highmem > save_highmem) {
+ to_free_highmem = alloc_highmem - save_highmem;
+ } else {
+ to_free_highmem = 0;
+ to_free_normal -= save_highmem - alloc_highmem;
+ }
+
+ memory_bm_position_reset(&copy_bm);
+
+ while (to_free_normal > 0 && to_free_highmem > 0) {
+ unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+ struct page *page = pfn_to_page(pfn);
+
+ if (PageHighMem(page)) {
+ if (!to_free_highmem)
+ continue;
+ to_free_highmem--;
+ alloc_highmem--;
+ } else {
+ if (!to_free_normal)
+ continue;
+ to_free_normal--;
+ alloc_normal--;
+ }
+ memory_bm_clear_bit(&copy_bm, pfn);
+ swsusp_unset_page_forbidden(page);
+ swsusp_unset_page_free(page);
+ __free_page(page);
+ }
+}
+
+/**
+ * minimum_image_size - Estimate the minimum acceptable size of an image
+ * @saveable: Number of saveable pages in the system.
+ *
+ * We want to avoid attempting to free too much memory too hard, so estimate the
+ * minimum acceptable size of a hibernation image to use as the lower limit for
+ * preallocating memory.
+ *
+ * We assume that the minimum image size should be proportional to
+ *
+ * [number of saveable pages] - [number of pages that can be freed in theory]
+ *
+ * where the second term is the sum of (1) reclaimable slab pages, (2) active
+ * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
+ * minus mapped file pages.
+ */
+static unsigned long minimum_image_size(unsigned long saveable)
+{
+ unsigned long size;
+
+ size = global_page_state(NR_SLAB_RECLAIMABLE)
+ + global_page_state(NR_ACTIVE_ANON)
+ + global_page_state(NR_INACTIVE_ANON)
+ + global_page_state(NR_ACTIVE_FILE)
+ + global_page_state(NR_INACTIVE_FILE)
+ - global_page_state(NR_FILE_MAPPED);
+
+ return saveable <= size ? 0 : saveable - size;
+}
+
+/**
+ * hibernate_preallocate_memory - Preallocate memory for hibernation image
+ *
+ * To create a hibernation image it is necessary to make a copy of every page
+ * frame in use. We also need a number of page frames to be free during
+ * hibernation for allocations made while saving the image and for device
+ * drivers, in case they need to allocate memory from their hibernation
+ * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES,
+ * respectively, both of which are rough estimates). To make this happen, we
+ * compute the total number of available page frames and allocate at least
+ *
+ * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES
+ *
+ * of them, which corresponds to the maximum size of a hibernation image.
+ *
+ * If image_size is set below the number following from the above formula,
+ * the preallocation of memory is continued until the total number