summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h13
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/fair.c16
3 files changed, 27 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b1fc75e7187b..a463bc3ad437 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1343,7 +1343,20 @@ struct task_struct {
u64 node_stamp; /* migration stamp */
struct callback_head numa_work;
+ /*
+ * Exponential decaying average of faults on a per-node basis.
+ * Scheduling placement decisions are made based on the these counts.
+ * The values remain static for the duration of a PTE scan
+ */
unsigned long *numa_faults;
+
+ /*
+ * numa_faults_buffer records faults per node during the current
+ * scan window. When the scan completes, the counts in numa_faults
+ * decay and these values are copied.
+ */
+ unsigned long *numa_faults_buffer;
+
int numa_preferred_nid;
#endif /* CONFIG_NUMA_BALANCING */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d15cd70f85b5..064a0af44540 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1636,6 +1636,7 @@ static void __sched_fork(struct task_struct *p)
p->numa_preferred_nid = -1;
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
+ p->numa_faults_buffer = NULL;
#endif /* CONFIG_NUMA_BALANCING */
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9efd34f63e81..3abc651bc38a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -892,8 +892,14 @@ static void task_numa_placement(struct task_struct *p)
/* Find the node with the highest number of faults */
for_each_online_node(nid) {
- unsigned long faults = p->numa_faults[nid];
+ unsigned long faults;
+
+ /* Decay existing window and copy faults since last scan */
p->numa_faults[nid] >>= 1;
+ p->numa_faults[nid] += p->numa_faults_buffer[nid];
+ p->numa_faults_buffer[nid] = 0;
+
+ faults = p->numa_faults[nid];
if (faults > max_faults) {
max_faults = faults;
max_nid = nid;
@@ -919,9 +925,13 @@ void task_numa_fault(int node, int pages, bool migrated)
if (unlikely(!p->numa_faults)) {
int size = sizeof(*p->numa_faults) * nr_node_ids;
- p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
+ /* numa_faults and numa_faults_buffer share the allocation */
+ p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN);
if (!p->numa_faults)
return;
+
+ BUG_ON(p->numa_faults_buffer);
+ p->numa_faults_buffer = p->numa_faults + nr_node_ids;
}
/*
@@ -939,7 +949,7 @@ void task_numa_fault(int node, int pages, bool migrated)
task_numa_placement(p);
- p->numa_faults[node] += pages;
+ p->numa_faults_buffer[node] += pages;
}
static void reset_ptenuma_scan(struct task_struct *p)