/*
* linux/mm/oom_kill.c
*
* Copyright (C) 1998,2000 Rik van Riel
* Thanks go out to Claus Fischer for some serious inspiration and
* for goading me into coding this file...
* Copyright (C) 2010 Google, Inc.
* Rewritten by David Rientjes
*
* The routines in this file are used to kill a process when
* we're seriously out of memory. This gets called from __alloc_pages()
* in mm/page_alloc.c when we really run out of memory.
*
* Since we won't call these routines often (on a well-configured
* machine) this file will double as a 'coding guide' and a signpost
* for newbie kernel hackers. It features several pointers to major
* kernel subsystems and hints as to where to find out what things do.
*/
#include <linux/oom.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
#include <linux/swap.h>
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/cpuset.h>
#include <linux/export.h>
#include <linux/notifier.h>
#include <linux/memcontrol.h>
#include <linux/mempolicy.h>
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/freezer.h>
#include <linux/ftrace.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/init.h>
#include <linux/mmu_notifier.h>
#include <asm/tlb.h>
#include "internal.h"
#include "slab.h"
#define CREATE_TRACE_POINTS
#include <trace/events/oom.h>
int sysctl_panic_on_oom;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_dump_tasks = 1;
DEFINE_MUTEX(oom_lock);
#ifdef CONFIG_NUMA
/**
* has_intersects_mems_allowed() - check task eligiblity for kill
* @start: task struct of which task to consider
* @mask: nodemask passed to page allocator for mempolicy ooms
*
* Task eligibility is determined by whether or not a candidate task, @tsk,
* shares the same mempolicy nodes as current if it is bound by such a policy
* and whether or not it has the same set of allowed cpuset nodes.
*/
static bool has_intersects_mems_allowed(struct task_struct *start,
const nodemask_t *mask)
{
struct task_struct *tsk;
bool ret = false;
rcu_read_lock();
for_each_thread(start, tsk) {
if (mask) {
/*
* If this is a mempolicy constrained oom, tsk's
* cpuset is irrelevant. Only return true if its
* mempolicy intersects current, otherwise it may be
* needlessly killed.
*/
ret = mempolicy_nodemask_intersects(tsk, mask);
} else {
/*
* This is not a mempolicy constrained oom, so only
* check the mems of tsk's cpuset.
*/
ret = cpuset_mems_allowed_intersects(current, tsk);
}
if (ret)
break;
}
rcu_read_unlock();
return ret;
}
#else
static bool has_intersects_mems_allowed(struct task_struct *tsk,
const nodemask_t *mask)
{
return true;
}
#endif /* CONFIG_NUMA */
/*
* The process p may have detached its own ->mm while exiting or through
* use_mm(), but one or more of its subthreads may still have a valid
* pointer. Return p, or any of its subthreads with a valid ->mm, with
* task_lock() held.
*/
struct task_struct *find_lock_task_mm(struct task_struct *p)
{
struct task_struct *t;
rcu_read_lock();
for_each_thread(p, t) {
task_lock(t);
if (likely(t->mm))
goto found;
task_unlock(t);
}
t = NULL;
found:
rcu_read_unlock();
return t;
}
/*
* order == -1 means the oom kill is required by sysrq, otherwise only
* for display purposes.
*/
static inline bool is_sysrq_oom(struct oom_control *oc)
{
return oc->order == -1;
}
static inline bool is_memcg_oom(struct oom_control *oc)
{
return oc->memcg != NULL;
}
/* return true if the task is not adequate as candidate victim task. */
static bool oom_unkillable_task(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask)
{
if (is_global_init(p))
return true;
if (p->flags & PF_KTHREAD)
return true;
/* When mem_cgroup_out_of_memory() and p is not member of the group */
if (memcg && !task_in_mem_cgroup(p, memcg))
return true;
/* p may not have freeable memory in nodemask */
if (!has_intersects_mems_allowed(p, nodemask))
return true;
return false;
}
/*
* Print out unreclaimble slabs info when unreclaimable slabs amount is greater
* than all user memory (LRU pages)
*/
static bool is_dump_unreclaim_slabs(void)
{
unsigned long nr_lru;
nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
global_node_page_state(NR_INACTIVE_ANON) +
global_node_page_state(NR_ACTIVE_FILE) +
global_node_page_state(NR_INACTIVE_FILE) +
global_node_page_state(NR_ISOLATED_ANON) +
global_node_page_state(NR_ISOLATED_FILE) +
global_node_page_state(NR_UNEVICTABLE