summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-04-04 11:31:12 +0200
committerThomas Gleixner <tglx@linutronix.de>2017-04-04 11:31:12 +0200
commit38bffdac071b720db627bfd2b125a2802a04d419 (patch)
treec2e9cf66fa6ff1cc3092079e3eedeb4e6c402227 /kernel
parent57dd924e541a98219bf3a508623db2e0c07e75a7 (diff)
parenta481db34b9beb7a9647c23f2320dd38a2b1d681f (diff)
Merge branch 'sched/core' into locking/core
Required for the rtmutex/sched_deadline patches which depend on both branches
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c119
-rw-r--r--kernel/bpf/lpm_trie.c6
-rw-r--r--kernel/cgroup/cgroup-v1.c2
-rw-r--r--kernel/cgroup/pids.c2
-rw-r--r--kernel/sched/core.c201
-rw-r--r--kernel/sched/deadline.c63
-rw-r--r--kernel/sched/fair.c385
-rw-r--r--kernel/sched/features.h7
-rw-r--r--kernel/sched/loadavg.c20
-rw-r--r--kernel/sched/rt.c81
-rw-r--r--kernel/sched/sched.h65
-rw-r--r--kernel/workqueue.c1
12 files changed, 648 insertions, 304 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3ea87fb19a94..afe5bab376c9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,11 +13,12 @@
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
+#include <linux/rculist_nulls.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
struct bucket {
- struct hlist_head head;
+ struct hlist_nulls_head head;
raw_spinlock_t lock;
};
@@ -44,9 +45,14 @@ enum extra_elem_state {
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
- struct hlist_node hash_node;
- struct bpf_htab *htab;
- struct pcpu_freelist_node fnode;
+ struct hlist_nulls_node hash_node;
+ struct {
+ void *padding;
+ union {
+ struct bpf_htab *htab;
+ struct pcpu_freelist_node fnode;
+ };
+ };
};
union {
struct rcu_head rcu;
@@ -162,7 +168,8 @@ skip_percpu_elems:
offsetof(struct htab_elem, lru_node),
htab->elem_size, htab->map.max_entries);
else
- pcpu_freelist_populate(&htab->freelist, htab->elems,
+ pcpu_freelist_populate(&htab->freelist,
+ htab->elems + offsetof(struct htab_elem, fnode),
htab->elem_size, htab->map.max_entries);
return 0;
@@ -217,6 +224,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
int err, i;
u64 cost;
+ BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+ BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+
if (lru && !capable(CAP_SYS_ADMIN))
/* LRU implementation is much complicated than other
* maps. Hence, limit to CAP_SYS_ADMIN for now.
@@ -326,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
goto free_htab;
for (i = 0; i < htab->n_buckets; i++) {
- INIT_HLIST_HEAD(&htab->buckets[i].head);
+ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
}
@@ -366,20 +378,44 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
return &htab->buckets[hash & (htab->n_buckets - 1)];
}
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
{
return &__select_bucket(htab, hash)->head;
}
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
void *key, u32 key_size)
{
+ struct hlist_nulls_node *n;
+ struct htab_elem *l;
+
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+ if (l->hash == hash && !memcmp(&l->key, key, key_size))
+ return l;
+
+ return NULL;
+}
+
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+ u32 hash, void *key,
+ u32 key_size, u32 n_buckets)
+{
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_rcu(l, head, hash_node)
+again:
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
+ if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+ goto again;
+
return NULL;
}
@@ -387,7 +423,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l;
u32 hash, key_size;
@@ -400,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
head = select_bucket(htab, hash);
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
return l;
}
@@ -433,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
{
struct bpf_htab *htab = (struct bpf_htab *)arg;
- struct htab_elem *l, *tgt_l;
- struct hlist_head *head;
+ struct htab_elem *l = NULL, *tgt_l;
+ struct hlist_nulls_head *head;
+ struct hlist_nulls_node *n;
unsigned long flags;
struct bucket *b;
@@ -444,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
raw_spin_lock_irqsave(&b->lock, flags);
- hlist_for_each_entry_rcu(l, head, hash_node)
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
break;
}
@@ -459,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l, *next_l;
u32 hash, key_size;
int i;
@@ -473,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
head = select_bucket(htab, hash);
/* lookup the key */
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
if (!l) {
i = 0;
@@ -481,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
}
/* key was found, get next key in the same bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
struct htab_elem, hash_node);
if (next_l) {
@@ -500,7 +537,7 @@ find_first_elem:
head = select_bucket(htab, i);
/* pick first element in the bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
struct htab_elem, hash_node);
if (next_l) {
/* if it's not empty, just return it */
@@ -582,9 +619,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
int err = 0;
if (prealloc) {
- l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
- if (!l_new)
+ struct pcpu_freelist_node *l;
+
+ l = pcpu_freelist_pop(&htab->freelist);
+ if (!l)
err = -E2BIG;
+ else
+ l_new = container_of(l, struct htab_elem, fnode);
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
@@ -661,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -700,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
- hlist_del_rcu(&l_old->hash_node);
+ hlist_nulls_del_rcu(&l_old->hash_node);
free_htab_elem(htab, l_old);
}
ret = 0;
@@ -716,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new, *l_old = NULL;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -757,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
bpf_lru_node_set_ref(&l_new->lru_node);
- hlist_del_rcu(&l_old->hash_node);
+ hlist_nulls_del_rcu(&l_old->hash_node);
}
ret = 0;
@@ -781,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -820,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = PTR_ERR(l_new);
goto err;
}
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
}
ret = 0;
err:
@@ -834,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -882,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
} else {
pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
}
ret = 0;
@@ -910,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
@@ -930,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
free_htab_elem(htab, l);
ret = 0;
}
@@ -942,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
@@ -962,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
ret = 0;
}
@@ -977,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
int i;
for (i = 0; i < htab->n_buckets; i++) {
- struct hlist_head *head = select_bucket(htab, i);
- struct hlist_node *n;
+ struct hlist_nulls_head *head = select_bucket(htab, i);
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_safe(l, n, head, hash_node) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+ hlist_nulls_del_rcu(&l->hash_node);
if (l->state != HTAB_EXTRA_ELEM_USED)
htab_elem_free(htab, l);
}
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 8bfe0afaee10..b37bd9ab7f57 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -500,9 +500,15 @@ unlock:
raw_spin_unlock(&trie->lock);
}
+static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ return -ENOTSUPP;
+}
+
static const struct bpf_map_ops trie_ops = {
.map_alloc = trie_alloc,
.map_free = trie_free,
+ .map_get_next_key = trie_get_next_key,
.map_lookup_elem = trie_lookup_elem,
.map_update_elem = trie_update_elem,
.map_delete_elem = trie_delete_elem,
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 56eba9caa632..1dc22f6b49f5 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
struct task_struct *task;
int count = 0;
- seq_printf(seq, "css_set %p\n", cset);
+ seq_printf(seq, "css_set %pK\n", cset);
list_for_each_entry(task, &cset->tasks, cg_list) {
if (count++ > MAX_TASKS_SHOWN_PER_CSS)
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index e756dae49300..2237201d66d5 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task)
/* Only log the first time events_limit is incremented. */
if (atomic64_inc_return(&pids->events_limit) == 1) {
pr_info("cgroup: fork rejected by pids controller in ");
- pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+ pr_cont_cgroup_path(css->cgroup);
pr_cont("\n");
}
cgroup_file_notify(&pids->events_file);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b31fc05a0f1..ab9f6ac099a7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -86,21 +86,6 @@ int sysctl_sched_rt_runtime = 950000;
cpumask_var_t cpu_isolated_map;
/*
- * this_rq_lock - lock this runqueue and disable interrupts.
- */
-static struct rq *this_rq_lock(void)
- __acquires(rq->lock)
-{
- struct rq *rq;
-
- local_irq_disable();
- rq = this_rq();
- raw_spin_lock(&rq->lock);
-
- return rq;
-}
-
-/*
* __task_rq_lock - lock the rq @p resides on.
*/
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
@@ -233,8 +218,11 @@ void update_rq_clock(struct rq *rq)
return;
#ifdef CONFIG_SCHED_DEBUG
+ if (sched_feat(WARN_DOUBLE_CLOCK))
+ SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED);
rq->clock_update_flags |= RQCF_UPDATED;
#endif
+
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
if (delta < 0)
return;
@@ -261,13 +249,14 @@ static void hrtick_clear(struct rq *rq)
static enum hrtimer_restart hrtick(struct hrtimer *timer)
{
struct rq *rq = container_of(timer, struct rq, hrtick_timer);
+ struct rq_flags rf;
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
- raw_spin_lock(&rq->lock);
+ rq_lock(rq, &rf);
update_rq_clock(rq);
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, &rf);
return HRTIMER_NORESTART;
}
@@ -287,11 +276,12 @@ static void __hrtick_restart(struct rq *rq)
static void __hrtick_start(void *arg)
{
struct rq *rq = arg;
+ struct rq_flags rf;
- raw_spin_lock(&rq->lock);
+ rq_lock(rq, &rf);
__hrtick_restart(rq);
rq->hrtick_csd_pending = 0;
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, &rf);
}
/*
@@ -762,17 +752,23 @@ static void set_load_weight(struct task_struct *p)
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
- update_rq_clock(rq);
+ if (!(flags & ENQUEUE_NOCLOCK))
+ update_rq_clock(rq);
+
if (!(flags & ENQUEUE_RESTORE))
sched_info_queued(rq, p);
+
p->sched_class->enqueue_task(rq, p, flags);
}
static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
- update_rq_clock(rq);
+ if (!(flags & DEQUEUE_NOCLOCK))
+ update_rq_clock(rq);
+
if (!(flags & DEQUEUE_SAVE))
sched_info_dequeued(rq, p);
+
p->sched_class->dequeue_task(rq, p, flags);
}
@@ -946,18 +942,19 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
*
* Returns (locked) new rq. Old rq's lock is released.
*/
-static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new_cpu)
+static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
+ struct task_struct *p, int new_cpu)
{
lockdep_assert_held(&rq->lock);
p->on_rq = TASK_ON_RQ_MIGRATING;
- dequeue_task(rq, p, 0);
+ dequeue_task(rq, p, DEQUEUE_NOCLOCK);
set_task_cpu(p, new_cpu);
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, rf);
rq = cpu_rq(new_cpu);
- raw_spin_lock(&rq->lock);
+ rq_lock(rq, rf);
BUG_ON(task_cpu(p) != new_cpu);
enqueue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
@@ -980,7 +977,8 @@ struct migration_arg {
* So we race with normal scheduler movements, but that's OK, as long
* as the task is no longer on this CPU.
*/
-static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_cpu)
+static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
+ struct task_struct *p, int dest_cpu)
{
if (unlikely(!cpu_active(dest_cpu)))
return rq;
@@ -989,7 +987,8 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
return rq;
- rq = move_queued_task(rq, p, dest_cpu);
+ update_rq_clock(rq);
+ rq = move_queued_task(rq, rf, p, dest_cpu);
return rq;
}
@@ -1004,6 +1003,7 @@ static int migration_cpu_stop(void *data)
struct migration_arg *arg = data;
struct task_struct *p = arg->task;
struct rq *rq = this_rq();
+ struct rq_flags rf;
/*
* The original target CPU might have gone down and we might
@@ -1018,7 +1018,7 @@ static int migration_cpu_stop(void *data)
sched_ttwu_pending();
raw_spin_lock(&p->pi_lock);
- raw_spin_lock(&rq->lock);
+ rq_lock(rq, &rf);
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1026,11 +1026,11 @@ static int migration_cpu_stop(void *data)
*/
if (task_rq(p) == rq) {
if (task_on_rq_queued(p))
- rq = __migrate_task(rq, p, arg->dest_cpu);
+ rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
else
p->wake_cpu = arg->dest_cpu;
}
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, &rf);
raw_spin_unlock(&p->pi_lock);
local_irq_enable();
@@ -1063,7 +1063,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
* holding rq->lock.
*/
lockdep_assert_held(&rq->lock);
- dequeue_task(rq, p, DEQUEUE_SAVE);
+ dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
}
if (running)
put_prev_task(rq, p);
@@ -1071,7 +1071,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
p->sched_class->set_cpus_allowed(p, new_mask);
if (queued)
- enqueue_task(rq, p, ENQUEUE_RESTORE);
+ enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
if (running)
set_curr_task(rq, p);
}
@@ -1150,9 +1150,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
* OK, since we're going to drop the lock immediately
* afterwards anyway.
*/
- rq_unpin_lock(rq, &rf);
- rq = move_queued_task(rq, p, dest_cpu);
- rq_repin_lock(rq, &rf);
+ rq = move_queued_task(rq, &rf, p, dest_cpu);
}
out:
task_rq_unlock(rq, p, &rf);
@@ -1217,16 +1215,24 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
{
if (task_on_rq_queued(p)) {
struct rq *src_rq, *dst_rq;
+ struct rq_flags srf, drf;
src_rq = task_rq(p);
dst_rq = cpu_rq(cpu);
+ rq_pin_lock(src_rq, &srf);
+ rq_pin_lock(dst_rq, &drf);
+
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, cpu);
activate_task(dst_rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(dst_rq, p, 0);
+
+ rq_unpin_lock(dst_rq, &drf);
+ rq_unpin_lock(src_rq, &srf);
+
} else {
/*
* Task isn't running anymore; make it appear like we migrated
@@ -1680,7 +1686,7 @@ static void
ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
struct rq_flags *rf)
{
- int en_flags = ENQUEUE_WAKEUP;
+ int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
lockdep_assert_held(&rq->lock);
@@ -1726,14 +1732,13 @@ void sched_ttwu_pending(void)
struct rq *rq = this_rq();
struct llist_node *llist = llist_del_all(&rq->wake_list);
struct task_struct *p;
- unsigned long flags;
struct rq_flags rf;
if (!llist)
return;
- raw_spin_lock_irqsave(&rq->lock, flags);
- rq_pin_lock(rq, &rf);
+ rq_lock_irqsave(rq, &rf);
+ update_rq_clock(rq);
while (llist) {
int wake_flags = 0;
@@ -1747,8 +1752,7 @@ void sched_ttwu_pending(void)
ttwu_do_activate(rq, p, wake_flags, &rf);
}
- rq_unpin_lock(rq, &rf);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ rq_unlock_irqrestore(rq, &rf);
}
void scheduler_ipi(void)
@@ -1806,7 +1810,7 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
void wake_up_if_idle(int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long flags;
+ struct rq_flags rf;
rcu_read_lock();
@@ -1816,11 +1820,11 @@ void wake_up_if_idle(int cpu)
if (set_nr_if_polling(rq->idle)) {
trace_sched_wake_idle_without_ipi(cpu);
} else {
- raw_spin_lock_irqsave(&rq->lock, flags);
+ rq_lock_irqsave(rq, &rf);
if (is_idle_task(rq->curr))
smp_send_reschedule(cpu);
/* Else CPU is not idle, do nothing here: */
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ rq_unlock_irqrestore(rq, &rf);
}
out:
@@ -1846,11 +1850,10 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
}
#endif
- raw_spin_lock(&rq->lock);
- rq_pin_lock(rq, &rf);
+ rq_lock(rq, &rf);
+ update_rq_clock(rq);
ttwu_do_activate(rq, p, wake_flags, &rf);
- rq_unpin_lock(rq, &rf);
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, &rf);
}
/*
@@ -2097,11 +2100,9 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
* disabled avoiding further scheduler activity on it and we've
* not yet picked a replacement task.
*/
- rq_unpin_lock(rq, rf);
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, rf);
raw_spin_lock(&p->pi_lock);
- raw_spin_lock(&rq->lock);
- rq_repin_lock(rq, rf);
+ rq_relock(rq, rf);
}
if (!(p->state & TASK_NORMAL))
@@ -2114,7 +2115,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
delayacct_blkio_end();
atomic_dec(&rq->nr_iowait);
}
- ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+ ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
}
ttwu_do_wakeup(rq, p, 0, rf);
@@ -2555,7 +2556,7 @@ void wake_up_new_task(struct task_struct *p)
update_rq_clock(rq);
post_init_entity_util_avg(&p->se);
- activate_task(rq, p, 0);
+ activate_task(rq, p, ENQUEUE_NOCLOCK);
p->on_rq = TASK_ON_RQ_QUEUED;
trace_sched_wakeup_new(p);
check_preempt_curr(rq, p, WF_FORK);
@@ -3093,15 +3094,18 @@ void scheduler_tick(void)
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
+ struct rq_flags rf;
sched_clock_tick();
- raw_spin_lock(&rq->lock);
+ rq_lock(rq, &rf);
+
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
cpu_load_update_active(rq);
calc_global_load_tick(rq);
- raw_spin_unlock(&rq->lock);
+
+ rq_unlock(rq, &rf);
perf_event_task_tick();
@@ -3386,18 +3390,18 @@ static void __sched notrace __schedule(bool preempt)
* done by the caller to avoid the race with signal_wake_up().
*/
smp_mb__before_spinlock();
- raw_spin_lock(&rq->lock);
- rq_pin_lock(rq, &rf);
+ rq_lock(rq, &rf);
/* Promote REQ to ACT */
rq->clock_update_flags <<= 1;
+ update_rq_clock(rq);
switch_count = &prev->nivcsw;
if (!preempt && prev->state) {
if (unlikely(signal_pending_state(prev->state, prev))) {
prev->state = TASK_RUNNING;
} else {
- deactivate_task(rq, prev, DEQUEUE_SLEEP);
+ deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
prev->on_rq = 0;
if (prev->in_iowait) {
@@ -3421,9 +3425,6 @@ static void __sched notrace __schedule(bool preempt)
switch_count = &prev->nvcsw;
}
- if (task_on_rq_queued(prev))
- update_rq_clock(rq);
-
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
@@ -3439,8 +3440,7 @@ static void __sched notrace __schedule(bool preempt)
rq = context_switch(rq, prev, next, &rf);
} else {
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
- rq_unpin_lock(rq, &rf);
- raw_spin_unlock_irq(&rq->lock);
+ rq_unlock_irq(rq, &rf);
}
balance_callback(rq);
@@ -3684,7 +3684,8 @@ EXPORT_SYMBOL(default_wake_function);
*/
void rt_mutex_setprio(struct task_struct *p, int prio)
{
- int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ int oldprio, queued, running, queue_flag =
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
const struct sched_class *prev_class;
struct rq_flags rf;
struct rq *rq;
@@ -3805,7 +3806,7 @@ void set_user_nice(struct task_struct *p, long nice)
queued = task_on_rq_queued(p);
running = task_current(rq, p);
if (queued)
- dequeue_task(rq, p, DEQUEUE_SAVE);
+ dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
if (running)
put_prev_task(rq, p);
@@ -3816,7 +3817,7 @@ void set_user_nice(struct task_struct *p, long nice)
delta = p->prio - old_prio;
if (queued) {
- enqueue_task(rq, p, ENQUEUE_RESTORE);
+ enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
/*
* If the task increased its priority or is running and
* lowered its priority, then reschedule its CPU:
@@ -4126,7 +4127,7 @@ static int __sched_setscheduler(struct task_struct *p,
const struct sched_class *prev_class;
struct rq_flags rf;
int reset_on_fork;
- int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct rq *rq;
/* May grab non-irq protected spin_locks: */
@@ -4923,7 +4924,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
*/
SYSCALL_DEFINE0(sched_yield)
{
- struct rq *rq = this_rq_lock();
+ struct rq_flags rf;
+ struct rq *rq;
+
+ local_irq_disable();
+ rq = this_rq();
+ rq_lock(rq, &rf);
schedstat_inc(rq->yld_count);
current->sched_class->yield_task(rq);
@@ -4932,9 +4938,8 @@ SYSCALL_DEFINE0(sched_yield)
* Since we are going to call schedule() anyway, there's
* no need to preempt or enable interrupts:
*/
- __release(rq->lock);
- spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
- do_raw_spin_unlock(&rq->lock);
+ preempt_disable();
+ rq_unlock(rq, &rf);
sched_preempt_enable_no_resched();
schedule();
@@ -5514,7 +5519,7 @@ void sched_setnuma(struct task_struct *p, int nid)
p->numa_preferred_nid = nid;
if (queued)
- enqueue_task(rq, p, ENQUEUE_RESTORE);
+ enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
if (running)
set_curr_task(rq, p);
task_rq_unlock(rq, p, &rf);
@@ -5579,11 +5584,11 @@ static struct task_struct fake_task = {
* there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts.
*/
-static void migrate_tasks(struct rq *dead_rq)
+static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
{
struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop;
- struct rq_flags rf;
+ struct rq_flags orf = *rf;
int dest_cpu;
/*
@@ -5602,9 +5607,7 @@ static void migrate_tasks(struct rq *dead_rq)
* class method both need to have an up-to-date
* value of rq->clock[_task]
*/
- rq_pin_lock(rq, &rf);
update_rq_clock(rq);
- rq_unpin_lock(rq, &rf);
for (;;) {
/*
@@ -5617,8 +5620,7 @@ static void migrate_tasks(struct rq *dead_rq)
/*
* pick_next_task() assumes pinned rq->lock:
*/
- rq_repin_lock(rq, &rf);
- next = pick_next_task(rq, &fake_task, &rf);
+ next = pick_next_task(rq, &fake_task, rf);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
@@ -5631,10 +5633,9 @@ static void migrate_tasks(struct rq *dead_rq)
* because !cpu_active at this point, which means load-balance
* will not interfere. Also, stop-machine.
*/
- rq_unpin_lock(rq, &rf);
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, rf);
raw_spin_lock(&next->pi_lock);
- raw_spin_lock(&rq->lock);
+ rq_relock(rq, rf);
/*
* Since we're inside stop-machine, _nothing_ should have
@@ -5648,12 +5649,12 @@ static void migrate_tasks(struct rq *dead_rq)
/* Find suitable destination for @next, with force if needed. */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
-
- rq = __migrate_task(rq, next, dest_cpu);
+ rq = __migrate_task(rq, rf, next, dest_cpu);
if (rq != dead_rq) {
- raw_spin_unlock(&rq->lock);
+ rq_unlock(rq, rf);
rq = dead_rq;
- raw_spin_lock(&rq->lock);
+ *rf = orf;
+ rq_relock(rq, rf);
}
raw_spin_unlock(&next->pi_lock);
}
@@ -5766,7 +5767,7 @@ static int cpuset_cpu_inactive(unsigned int cpu)
int sched_cpu_activate(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long flags;
+ struct rq_flags rf;
set_cpu_active(cpu, true);
@@ -5784,12 +5785,12 @@ int sched_cpu_activate(unsigned int cpu)
* 2) At runtime, if cpuset_cpu_active() fails to rebuild the
* domains.
*/
- raw_spin_lock_irqsave(&rq->lock, flags);
+ rq_lock_irqsave(rq, &rf);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_online(rq);
}
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ rq_unlock_irqrestore(rq, &rf);
update_max_interval();
@@ -5847,18 +5848,20 @@ int sched_cpu_starting(unsigned int cpu)
int sched_cpu_dying(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long flags;
+ struct rq_flags rf;
/* Handle pending wakeups and then migrate everything off */
sched_ttwu_pending();
- raw_spin_lock_irqsave(&rq->lock, flags);
+
+ rq_lock_irqsave(rq, &rf);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
- migrate_tasks(rq);
+ migrate_tasks(rq, &rf);
BUG_ON(rq->nr_running != 1);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ rq_unlock_irqrestore(rq, &rf);
+
calc_load_migrate(rq);
update_max_interval();
nohz_balance_exit_idle(cpu);
@@ -6412,7 +6415,8 @@ static void sched_change_group(struct task_struct *tsk, int type)
*/
void sched_move_task(struct task_struct *tsk)
{
- int queued, running;
+ int queued, running, queue_flags =
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct rq_flags rf;
struct rq *rq;
@@ -6423,14 +6427,14 @@ void sched_move_task(struct task_struct *tsk)
queued = task_on_rq_queued(tsk);
if (queued)
- dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);