summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/preempt.h3
-rw-r--r--drivers/tty/n_tty.c17
-rw-r--r--fs/notify/inotify/inotify_user.c9
-rw-r--r--include/asm-generic/preempt.h3
-rw-r--r--include/linux/freezer.h50
-rw-r--r--include/linux/init_task.h10
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/sched.h87
-rw-r--r--include/linux/wait.h80
-rw-r--r--include/net/sock.h1
-rw-r--r--include/trace/events/sched.h9
-rw-r--r--include/uapi/linux/sched.h4
-rw-r--r--kernel/audit.c11
-rw-r--r--kernel/cpuset.c23
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/locking/mutex.c8
-rw-r--r--kernel/module.c30
-rw-r--r--kernel/sched/completion.c5
-rw-r--r--kernel/sched/core.c241
-rw-r--r--kernel/sched/cpudeadline.h3
-rw-r--r--kernel/sched/cpupri.h3
-rw-r--r--kernel/sched/deadline.c99
-rw-r--r--kernel/sched/debug.c11
-rw-r--r--kernel/sched/fair.c354
-rw-r--r--kernel/sched/rt.c17
-rw-r--r--kernel/sched/sched.h43
-rw-r--r--kernel/sched/wait.c66
-rw-r--r--kernel/smpboot.c15
-rw-r--r--net/bluetooth/rfcomm/core.c18
-rw-r--r--net/core/dev.c10
-rw-r--r--net/core/rtnetlink.c10
31 files changed, 915 insertions, 335 deletions
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 400873450e33..8f3271842533 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -30,9 +30,6 @@ static __always_inline void preempt_count_set(int pc)
/*
* must be macros to avoid header recursion hell
*/
-#define task_preempt_count(p) \
- (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
-
#define init_task_preempt_count(p) do { \
task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
} while (0)
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 2e900a98c3e3..26f097f60b10 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2123,7 +2123,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
{
struct n_tty_data *ldata = tty->disc_data;
unsigned char __user *b = buf;
- DECLARE_WAITQUEUE(wait, current);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int c;
int minimum, time;
ssize_t retval = 0;
@@ -2186,10 +2186,6 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
nr--;
break;
}
- /* This statement must be first before checking for input
- so that any interrupt will set the state back to
- TASK_RUNNING. */
- set_current_state(TASK_INTERRUPTIBLE);
if (((minimum - (b - buf)) < ldata->minimum_to_wake) &&
((minimum - (b - buf)) >= 1))
@@ -2220,13 +2216,13 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
n_tty_set_room(tty);
up_read(&tty->termios_rwsem);
- timeout = schedule_timeout(timeout);
+ timeout = wait_woken(&wait, TASK_INTERRUPTIBLE,
+ timeout);
down_read(&tty->termios_rwsem);
continue;
}
}
- __set_current_state(TASK_RUNNING);
/* Deal with packet mode. */
if (packet && b == buf) {
@@ -2273,7 +2269,6 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
mutex_unlock(&ldata->atomic_read_lock);
- __set_current_state(TASK_RUNNING);
if (b - buf)
retval = b - buf;
@@ -2306,7 +2301,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
const unsigned char *buf, size_t nr)
{
const unsigned char *b = buf;
- DECLARE_WAITQUEUE(wait, current);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int c;
ssize_t retval = 0;
@@ -2324,7 +2319,6 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
add_wait_queue(&tty->write_wait, &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current)) {
retval = -ERESTARTSYS;
break;
@@ -2378,12 +2372,11 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
}
up_read(&tty->termios_rwsem);
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
down_read(&tty->termios_rwsem);
}
break_out:
- __set_current_state(TASK_RUNNING);
remove_wait_queue(&tty->write_wait, &wait);
if (b - buf != nr && tty->fasync)
set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index daf76652fe58..283aa312d745 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -227,14 +227,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
struct fsnotify_event *kevent;
char __user *start;
int ret;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
start = buf;
group = file->private_data;
+ add_wait_queue(&group->notification_waitq, &wait);
while (1) {
- prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
-
mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
@@ -264,10 +263,10 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
if (start != buf)
break;
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
+ remove_wait_queue(&group->notification_waitq, &wait);
- finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 1cd3f5d767a8..eb6f9e6c3075 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -23,9 +23,6 @@ static __always_inline void preempt_count_set(int pc)
/*
* must be macros to avoid header recursion hell
*/
-#define task_preempt_count(p) \
- (task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED)
-
#define init_task_preempt_count(p) do { \
task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
} while (0)
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 7fd81b8c4897..6b7fd9cf5ea2 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -246,15 +246,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
* defined in <linux/wait.h>
*/
-#define wait_event_freezekillable(wq, condition) \
-({ \
- int __retval; \
- freezer_do_not_count(); \
- __retval = wait_event_killable(wq, (condition)); \
- freezer_count(); \
- __retval; \
-})
-
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
#define wait_event_freezekillable_unsafe(wq, condition) \
({ \
@@ -265,35 +256,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
__retval; \
})
-#define wait_event_freezable(wq, condition) \
-({ \
- int __retval; \
- freezer_do_not_count(); \
- __retval = wait_event_interruptible(wq, (condition)); \
- freezer_count(); \
- __retval; \
-})
-
-#define wait_event_freezable_timeout(wq, condition, timeout) \
-({ \
- long __retval = timeout; \
- freezer_do_not_count(); \
- __retval = wait_event_interruptible_timeout(wq, (condition), \
- __retval); \
- freezer_count(); \
- __retval; \
-})
-
-#define wait_event_freezable_exclusive(wq, condition) \
-({ \
- int __retval; \
- freezer_do_not_count(); \
- __retval = wait_event_interruptible_exclusive(wq, condition); \
- freezer_count(); \
- __retval; \
-})
-
-
#else /* !CONFIG_FREEZER */
static inline bool frozen(struct task_struct *p) { return false; }
static inline bool freezing(struct task_struct *p) { return false; }
@@ -331,18 +293,6 @@ static inline void set_freezable(void) {}
#define freezable_schedule_hrtimeout_range(expires, delta, mode) \
schedule_hrtimeout_range(expires, delta, mode)
-#define wait_event_freezable(wq, condition) \
- wait_event_interruptible(wq, condition)
-
-#define wait_event_freezable_timeout(wq, condition, timeout) \
- wait_event_interruptible_timeout(wq, condition, timeout)
-
-#define wait_event_freezable_exclusive(wq, condition) \
- wait_event_interruptible_exclusive(wq, condition)
-
-#define wait_event_freezekillable(wq, condition) \
- wait_event_killable(wq, condition)
-
#define wait_event_freezekillable_unsafe(wq, condition) \
wait_event_killable(wq, condition)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d996aef8044f..3037fc085e8e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -166,6 +166,15 @@ extern struct task_group root_task_group;
# define INIT_RT_MUTEXES(tsk)
#endif
+#ifdef CONFIG_NUMA_BALANCING
+# define INIT_NUMA_BALANCING(tsk) \
+ .numa_preferred_nid = -1, \
+ .numa_group = NULL, \
+ .numa_faults = NULL,
+#else
+# define INIT_NUMA_BALANCING(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -237,6 +246,7 @@ extern struct task_group root_task_group;
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
INIT_VTIME(tsk) \
+ INIT_NUMA_BALANCING(tsk) \
}
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3d770f5564b8..446d76a87ba1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -162,6 +162,7 @@ extern int _cond_resched(void);
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ void ___might_sleep(const char *file, int line, int preempt_offset);
void __might_sleep(const char *file, int line, int preempt_offset);
/**
* might_sleep - annotation for functions that can sleep
@@ -175,10 +176,14 @@ extern int _cond_resched(void);
*/
# define might_sleep() \
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+# define sched_annotate_sleep() __set_current_state(TASK_RUNNING)
#else
+ static inline void ___might_sleep(const char *file, int line,
+ int preempt_offset) { }
static inline void __might_sleep(const char *file, int line,
int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
+# define sched_annotate_sleep() do { } while (0)
#endif
#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 706a9f744909..55f5ee7cc3d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!(
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
(task->flags & PF_FROZEN) == 0)
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+
+#define __set_task_state(tsk, state_value) \
+ do { \
+ (tsk)->task_state_change = _THIS_IP_; \
+ (tsk)->state = (state_value); \
+ } while (0)
+#define set_task_state(tsk, state_value) \
+ do { \
+ (tsk)->task_state_change = _THIS_IP_; \
+ set_mb((tsk)->state, (state_value)); \
+ } while (0)
+
+/*
+ * set_current_state() includes a barrier so that the write of current->state
+ * is correctly serialised wrt the caller's subsequent test of whether to
+ * actually sleep:
+ *
+ * set_current_state(TASK_UNINTERRUPTIBLE);
+ * if (do_i_need_to_sleep())
+ * schedule();
+ *
+ * If the caller does not need such serialisation then use __set_current_state()
+ */
+#define __set_current_state(state_value) \
+ do { \
+ current->task_state_change = _THIS_IP_; \
+ current->state = (state_value); \
+ } while (0)
+#define set_current_state(state_value) \
+ do { \
+ current->task_state_change = _THIS_IP_; \
+ set_mb(current->state, (state_value)); \
+ } while (0)
+
+#else
+
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \
@@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!(
*
* If the caller does not need such serialisation then use __set_current_state()
*/
-#define __set_current_state(state_value) \
+#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
-#define set_current_state(state_value) \
+#define set_current_state(state_value) \
set_mb(current->state, (state_value))
+#endif
+
/* Task command name length */
#define TASK_COMM_LEN 16
@@ -1558,28 +1597,23 @@ struct task_struct {
struct numa_group *numa_group;
/*
- * Exponential decaying average of faults on a per-node basis.
- * Scheduling placement decisions are made based on the these counts.
- * The values remain static for the duration of a PTE scan
+ * numa_faults is an array split into four regions:
+ * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
+ * in this precise order.
+ *
+ * faults_memory: Exponential decaying average of faults on a per-node
+ * basis. Scheduling placement decisions are made based on these
+ * counts. The values remain static for the duration of a PTE scan.
+ * faults_cpu: Track the nodes the process was running on when a NUMA
+ * hinting fault was incurred.
+ * faults_memory_buffer and faults_cpu_buffer: Record faults per node
+ * during the current scan window. When the scan completes, the counts
+ * in faults_memory and faults_cpu decay and these values are copied.
*/
- unsigned long *numa_faults_memory;
+ unsigned long *numa_faults;
unsigned long total_numa_faults;
/*
- * numa_faults_buffer records faults per node during the current
- * scan window. When the scan completes, the counts in
- * numa_faults_memory decay and these values are copied.
- */
- unsigned long *numa_faults_buffer_memory;
-
- /*
- * Track the nodes the process was running on when a NUMA hinting
- * fault was incurred.
- */
- unsigned long *numa_faults_cpu;
- unsigned long *numa_faults_buffer_cpu;
-
- /*
* numa_faults_locality tracks if faults recorded during the last
* scan window were remote/local. The task scan period is adapted
* based on the locality of the faults with different weights
@@ -1661,6 +1695,9 @@ struct task_struct {
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ unsigned long task_state_change;
+#endif
};
/* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2052,6 +2089,10 @@ static inline void tsk_restore_flags(struct task_struct *task,
task->flags |= orig_flags & flags;
}
+extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
+ const struct cpumask *trial);
+extern int task_can_attach(struct task_struct *p,
+ const struct cpumask *cs_cpus_allowed);
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask);
@@ -2760,7 +2801,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
extern int _cond_resched(void);
#define cond_resched() ({ \
- __might_sleep(__FILE__, __LINE__, 0); \
+ ___might_sleep(__FILE__, __LINE__, 0); \
_cond_resched(); \
})
@@ -2773,14 +2814,14 @@ extern int __cond_resched_lock(spinlock_t *lock);
#endif
#define cond_resched_lock(lock) ({ \
- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
+ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \
})
extern int __cond_resched_softirq(void);
#define cond_resched_softirq() ({ \
- __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
+ ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
__cond_resched_softirq(); \
})
diff --git a/include/linux/wait.h b/include/linux/wait.h
index e4a8eb9312ea..2232ed16635a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -13,9 +13,12 @@ typedef struct __wait_queue wait_queue_t;
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+/* __wait_queue::flags */
+#define WQ_FLAG_EXCLUSIVE 0x01
+#define WQ_FLAG_WOKEN 0x02
+
struct __wait_queue {
unsigned int flags;
-#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
wait_queue_func_t func;
struct list_head task_list;
@@ -258,11 +261,37 @@ __out: __ret; \
*/
#define wait_event(wq, condition) \
do { \
+ might_sleep(); \
if (condition) \
break; \
__wait_event(wq, condition); \
} while (0)
+#define __wait_event_freezable(wq, condition) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
+ schedule(); try_to_freeze())
+
+/**
+ * wait_event - sleep (or freeze) until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
+ * to system load) until the @condition evaluates to true. The
+ * @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_freezable(wq, condition) \
+({ \
+ int __ret = 0; \
+ might_sleep(); \
+ if (!(condition)) \
+ __ret = __wait_event_freezable(wq, condition); \
+ __ret; \
+})
+
#define __wait_event_timeout(wq, condition, timeout) \
___wait_event(wq, ___wait_cond_timeout(condition), \
TASK_UNINTERRUPTIBLE, 0, timeout, \
@@ -290,11 +319,30 @@ do { \
#define wait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
+ might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_timeout(wq, condition, timeout); \
__ret; \
})
+#define __wait_event_freezable_timeout(wq, condition, timeout) \
+ ___wait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_INTERRUPTIBLE, 0, timeout, \
+ __ret = schedule_timeout(__ret); try_to_freeze())
+
+/*
+ * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
+ * increasing load and is freezable.
+ */
+#define wait_event_freezable_timeout(wq, condition, timeout) \
+({ \
+ long __ret = timeout; \
+ might_sleep(); \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __wait_event_freezable_timeout(wq, condition, timeout); \
+ __ret; \
+})
+
#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
cmd1; schedule(); cmd2)
@@ -315,6 +363,7 @@ do { \
*/
#define wait_event_cmd(wq, condition, cmd1, cmd2) \
do { \
+ might_sleep(); \
if (condition) \
break; \
__wait_event_cmd(wq, condition, cmd1, cmd2); \
@@ -342,6 +391,7 @@ do { \
#define wait_event_interruptible(wq, condition) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_interruptible(wq, condition); \
__ret; \
@@ -375,6 +425,7 @@ do { \
#define wait_event_interruptible_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
+ might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_interruptible_timeout(wq, \
condition, timeout); \
@@ -425,6 +476,7 @@ do { \
#define wait_event_hrtimeout(wq, condition, timeout) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
TASK_UNINTERRUPTIBLE); \
@@ -450,6 +502,7 @@ do { \
#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
({ \
long __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
TASK_INTERRUPTIBLE); \
@@ -463,12 +516,27 @@ do { \
#define wait_event_interruptible_exclusive(wq, condition) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_interruptible_exclusive(wq, condition);\
__ret; \
})
+#define __wait_event_freezable_exclusive(wq, condition) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
+ schedule(); try_to_freeze())
+
+#define wait_event_freezable_exclusive(wq, condition) \
+({ \
+ int __ret = 0; \
+ might_sleep(); \
+ if (!(condition)) \
+ __ret = __wait_event_freezable_exclusive(wq, condition);\
+ __ret; \
+})
+
+
#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
({ \
int __ret = 0; \
@@ -637,6 +705,7 @@ do { \
#define wait_event_killable(wq, condition) \
({ \
int __ret = 0; \
+ might_sleep(); \
if (!(condition)) \
__ret = __wait_event_killable(wq, condition); \
__ret; \
@@ -830,6 +899,8 @@ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int sta
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
+long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
+int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
@@ -886,6 +957,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *);
static inline int
wait_on_bit(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
@@ -910,6 +982,7 @@ wait_on_bit(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_io(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
@@ -936,6 +1009,7 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
{
+ might_sleep();
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit, action, mode);
@@ -963,6 +1037,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
static inline int
wait_on_bit_lock(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
@@ -986,6 +1061,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_lock_io(void *word, int bit, unsigned mode)
{
+ might_sleep();
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
@@ -1011,6 +1087,7 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
static inline int
wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
{
+ might_sleep();
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, action, mode);
@@ -1029,6 +1106,7 @@ wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned
static inline
int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
{
+ might_sleep();
if (atomic_read(val) == 0)
return 0;
return out_of_line_wait_on_atomic_t(val, action, mode);
diff --git a/include/net/sock.h b/include/net/sock.h
index 7db3db112baa..e6f235ebf6c9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -897,6 +897,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
if (!__rc) { \
*(__timeo) = schedule_timeout(*(__timeo)); \
} \
+ sched_annotate_sleep(); \
lock_sock(__sk); \
__rc = __condition; \
__rc; \
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 0a68d5ae584e..30fedaf3e56a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -97,16 +97,19 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
long state = p->state;
#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_SCHED_DEBUG
+ BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
/*
* For all intents and purposes a preempted task is a running task.
*/
- if (task_preempt_count(p) & PREEMPT_ACTIVE)
+ if (preempt_count() & PREEMPT_ACTIVE)
state = TASK_RUNNING | TASK_STATE_MAX;
-#endif
+#endif /* CONFIG_PREEMPT */
return state;
}
-#endif
+#endif /* CREATE_TRACE_POINTS */
/*
* Tracepoint for task switches, performed by the scheduler:
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index b932be9f5c5b..cc89ddefa926 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -23,8 +23,8 @@
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
and is now available for re-use. */
-#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
-#define CLONE_NEWIPC 0x08000000 /* New ipcs */
+#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
+#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
#define CLONE_NEWPID 0x20000000 /* New pid namespace */
#define CLONE_NEWNET 0x40000000 /* New network namespace */
diff --git a/kernel/audit.c b/kernel/audit.c
index cebb11db4d34..1f37f15117e5 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -499,7 +499,6 @@ static int kauditd_thread(void *dummy)
set_freezable();
while (!kthread_should_stop()) {
struct sk_buff *skb;
- DECLARE_WAITQUEUE(wait, current);
flush_hold_queue();
@@ -514,16 +513,8 @@ static int kauditd_thread(void *dummy)
audit_printk_skb(skb);
continue;
}
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kauditd_wait, &wait);
- if (!skb_queue_len(&audit_skb_queue)) {
- try_to_freeze();
- schedule();
- }
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&kauditd_wait, &wait);
+ wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue));
}
return 0;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1f107c74087b..723cfc9d0ad7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -506,6 +506,16 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
goto out;
}
+ /*
+ * We can't shrink if we won't have enough room for SCHED_DEADLINE
+ * tasks.
+ */
+ ret = -EBUSY;
+ if (is_cpu_exclusive(cur) &&
+ !cpuset_cpumask_can_shrink(cur->cpus_allowed,
+ trial->cpus_allowed))
+ goto out;
+
ret = 0;
out:
rcu_read_unlock();
@@ -1429,17 +1439,8 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
goto out_unlock;
cgroup_taskset_for_each(task, tset) {
- /*
- * Kthreads which disallow setaffinity shouldn't be moved
- * to a new cpuset; we don't want to change their cpu
- * affinity and isolating such threads by their set of
- * allowed nodes is unnecessary. Thus, cpusets are not
- * applicable for such threads. This prevents checking for
- * success of set_cpus_allowed_ptr() on all attached tasks
- * before cpus_allowed may be changed.
- */
- ret = -EINVAL;
- if (task->flags & PF_NO_SETAFFINITY)
+ ret = task_can_attach(task, cs->cpus_allowed);
+ if (ret)
goto out_unlock;
ret = security_task_setscheduler(task);
if (ret)
diff --git a/kernel/exit.c b/kernel/exit.c
index 5d30019ff953..232c4bc8bcc9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -997,6 +997,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
get_task_struct(p);
read_unlock(&tasklist_lock);
+ sched_annotate_sleep();
+
if ((exit_code & 0x7f) == 0) {
why = CLD_EXITED;
status = exit_code >> 8;
@@ -1079,6 +1081,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* thread can reap it because we its state == DEAD/TRACE.
*/
read_unlock(&tasklist_lock);
+ sched_annotate_sleep();
retval = wo->wo_rusage
? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
@@ -1210,6 +1213,7 @@ unlock_sig:
pid = task_pid_vnr(p);
why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
read_unlock(&tasklist_lock);
+ sched_annotate_sleep();
if (unlikely(wo->wo_flags & WNOWAIT))
return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
@@ -1272,6 +1276,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
pid = task_pid_vnr(p);
get_task_struct(p);
read_unlock(&tasklist_lock);
+ sched_annotate_sleep();
if (!wo->wo_info) {
retval = wo->wo_rusage
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index dadbf88c22c4..454195194d4a 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -378,8 +378,14 @@ done:
* reschedule now, before we try-lock the mutex. This avoids getting
* scheduled out right after we obtained the mutex.
*/
- if (need_resched())
+ if (need_resched()) {
+ /*
+ * We _should_ have TASK_RUNNING here, but just in case
+ * we do not, make it so, otherwise we might get stuck.
+ */
+ __set_current_state(TASK_RUNNING);
schedule_preempt_disabled();