diff options
37 files changed, 762 insertions, 959 deletions
diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index c99e259469f7..12ebfcc1d539 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -10,7 +10,9 @@ * CPU. */ #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) -#define finish_arch_switch(prev) dsb(ish) +#define __complete_pending_tlbi() dsb(ish) +#else +#define __complete_pending_tlbi() #endif /* @@ -22,6 +24,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ + __complete_pending_tlbi(); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) diff --git a/arch/avr32/include/asm/switch_to.h b/arch/avr32/include/asm/switch_to.h index 9a8e9d5208d4..6f00581c3d4f 100644 --- a/arch/avr32/include/asm/switch_to.h +++ b/arch/avr32/include/asm/switch_to.h @@ -15,11 +15,13 @@ */ #ifdef CONFIG_OWNERSHIP_TRACE #include <asm/ocd.h> -#define finish_arch_switch(prev) \ +#define ocd_switch(prev, next) \ do { \ ocd_write(PID, prev->pid); \ - ocd_write(PID, current->pid); \ + ocd_write(PID, next->pid); \ } while(0) +#else +#define ocd_switch(prev, next) #endif /* @@ -38,6 +40,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct cpu_context *); #define switch_to(prev, next, last) \ do { \ + ocd_switch(prev, next); \ last = __switch_to(prev, &prev->thread.cpu_context + 1, \ &next->thread.cpu_context); \ } while (0) diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 7163cd7fdd69..9733cd0266e4 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -83,45 +83,43 @@ do { if (cpu_has_rw_llb) { \ } \ } while (0) +/* + * For newly created kernel threads switch_to() will return to + * ret_from_kernel_thread, newly created user threads to ret_from_fork. + * That is, everything following resume() will be skipped for new threads. + * So everything that matters to new threads should be placed before resume(). + */ #define switch_to(prev, next, last) \ do { \ - u32 __c0_stat; \ s32 __fpsave = FP_SAVE_NONE; \ __mips_mt_fpaff_switch_to(prev); \ - if (cpu_has_dsp) \ + if (cpu_has_dsp) { \ __save_dsp(prev); \ - if (cop2_present && (KSTK_STATUS(prev) & ST0_CU2)) { \ - if (cop2_lazy_restore) \ - KSTK_STATUS(prev) &= ~ST0_CU2; \ - __c0_stat = read_c0_status(); \ - write_c0_status(__c0_stat | ST0_CU2); \ - cop2_save(prev); \ - write_c0_status(__c0_stat & ~ST0_CU2); \ + __restore_dsp(next); \ + } \ + if (cop2_present) { \ + set_c0_status(ST0_CU2); \ + if ((KSTK_STATUS(prev) & ST0_CU2)) { \ + if (cop2_lazy_restore) \ + KSTK_STATUS(prev) &= ~ST0_CU2; \ + cop2_save(prev); \ + } \ + if (KSTK_STATUS(next) & ST0_CU2 && \ + !cop2_lazy_restore) { \ + cop2_restore(next); \ + } \ + clear_c0_status(ST0_CU2); \ } \ __clear_software_ll_bit(); \ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \ __fpsave = FP_SAVE_SCALAR; \ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \ __fpsave = FP_SAVE_VECTOR; \ - (last) = resume(prev, next, task_thread_info(next), __fpsave); \ -} while (0) - -#define finish_arch_switch(prev) \ -do { \ - u32 __c0_stat; \ - if (cop2_present && !cop2_lazy_restore && \ - (KSTK_STATUS(current) & ST0_CU2)) { \ - __c0_stat = read_c0_status(); \ - write_c0_status(__c0_stat | ST0_CU2); \ - cop2_restore(current); \ - write_c0_status(__c0_stat & ~ST0_CU2); \ - } \ - if (cpu_has_dsp) \ - __restore_dsp(current); \ if (cpu_has_userlocal) \ - write_c0_userlocal(current_thread_info()->tp_value); \ + write_c0_userlocal(task_thread_info(next)->tp_value); \ __restore_watch(); \ disable_msa(); \ + (last) = resume(prev, next, task_thread_info(next), __fpsave); \ } while (0) #endif /* _ASM_SWITCH_TO_H */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 68d067ad4222..a9f753fb73a8 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2178,7 +2178,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vc->runner = vcpu; if (n_ceded == vc->n_runnable) { kvmppc_vcore_blocked(vc); - } else if (should_resched()) { + } else if (need_resched()) { vc->vcore_state = VCORE_PREEMPT; /* Let something else run */ cond_resched_lock(&vc->lock); diff --git a/arch/score/include/asm/switch_to.h b/arch/score/include/asm/switch_to.h index 031756b59ece..fda3f83308d2 100644 --- a/arch/score/include/asm/switch_to.h +++ b/arch/score/include/asm/switch_to.h @@ -8,6 +8,4 @@ do { \ (last) = resume(prev, next, task_thread_info(next)); \ } while (0) -#define finish_arch_switch(prev) do {} while (0) - #endif /* _ASM_SCORE_SWITCH_TO_H */ diff --git a/arch/sh/include/asm/switch_to_32.h b/arch/sh/include/asm/switch_to_32.h index 0c065513e7ac..7661b4ba8259 100644 --- a/arch/sh/include/asm/switch_to_32.h +++ b/arch/sh/include/asm/switch_to_32.h @@ -78,6 +78,8 @@ do { \ \ if (is_dsp_enabled(prev)) \ __save_dsp(prev); \ + if (is_dsp_enabled(next)) \ + __restore_dsp(next); \ \ __ts1 = (u32 *)&prev->thread.sp; \ __ts2 = (u32 *)&prev->thread.pc; \ @@ -125,10 +127,4 @@ do { \ last = __last; \ } while (0) -#define finish_arch_switch(prev) \ -do { \ - if (is_dsp_enabled(prev)) \ - __restore_dsp(prev); \ -} while (0) - #endif /* __ASM_SH_SWITCH_TO_32_H */ diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 50e7b626afe8..c5113c7ce2fd 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -333,11 +333,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs = (struct pt_regs *) (new_stack + STACKFRAME_SZ); /* - * A new process must start with interrupts closed in 2.5, - * because this is how Mingo's scheduler works (see schedule_tail - * and finish_arch_switch). If we do not do it, a timer interrupt hits - * before we unlock, attempts to re-take the rq->lock, and then we die. - * Thus, kpsr|=PSR_PIL. + * A new process must start with interrupts disabled, see schedule_tail() + * and finish_task_switch(). (If we do not do it and if a timer interrupt + * hits before we unlock and attempts to take the rq->lock, we deadlock.) + * + * Thus, kpsr |= PSR_PIL. */ ti->ksp = (unsigned long) new_stack; p->thread.kregs = childregs; diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h index b8f888cbe6b0..34ee72705521 100644 --- a/arch/tile/include/asm/switch_to.h +++ b/arch/tile/include/asm/switch_to.h @@ -53,15 +53,13 @@ extern unsigned long get_switch_to_pc(void); * Kernel threads can check to see if they need to migrate their * stack whenever they return from a context switch; for user * threads, we defer until they are returning to user-space. + * We defer homecache migration until the runqueue lock is released. */ -#define finish_arch_switch(prev) do { \ - if (unlikely((prev)->state == TASK_DEAD)) \ - __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \ - ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \ +#define finish_arch_post_lock_switch() do { \ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \ (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \ if (current->mm == NULL && !kstack_hash && \ - current_thread_info()->homecache_cpu != smp_processor_id()) \ + current_thread_info()->homecache_cpu != raw_smp_processor_id()) \ homecache_migrate_kthread(); \ } while (0) diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index a45213781ad0..7d5769310bef 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -446,6 +446,11 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, hardwall_switch_tasks(prev, next); #endif + /* Notify the simulator of task exit. */ + if (unlikely(prev->state == TASK_DEAD)) + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | + (prev->pid << _SIM_CONTROL_OPERATOR_BITS)); + /* * Switch kernel SP, PC, and callee-saved registers. * In the context of the new task, return the old task pointer diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index dca71714f860..b12f81022a6b 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!raw_cpu_read_4(__preempt_count)); + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPT diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 48b7228563ad..33253930247f 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -123,6 +123,7 @@ static void enter_freeze_proper(struct cpuidle_driver *drv, * cpuidle mechanism enables interrupts and doing that with timekeeping * suspended is generally unsafe. */ + stop_critical_timings(); drv->states[index].enter_freeze(dev, drv, index); WARN_ON(!irqs_disabled()); /* @@ -131,6 +132,7 @@ static void enter_freeze_proper(struct cpuidle_driver *drv, * critical sections, so tell RCU about that. */ RCU_NONIDLE(tick_unfreeze()); + start_critical_timings(); } /** @@ -195,7 +197,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ktime_get(); + stop_critical_timings(); entered_state = target_state->enter(dev, drv, index); + start_critical_timings(); time_end = ktime_get(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index a1800c150839..08cb419eb4e6 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); asmlinkage __visible void xen_maybe_preempt_hcall(void) { if (unlikely(__this_cpu_read(xen_in_preemptible_hcall) - && should_resched())) { + && need_resched())) { /* * Clear flag as we may be rescheduled on a different * cpu. diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d0a7a4753db2..0bec580a4885 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -71,9 +71,10 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!preempt_count() && tif_need_resched()); + return unlikely(preempt_count() == preempt_offset && + tif_need_resched()); } #ifdef CONFIG_PREEMPT diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e8493fee8160..d0b380ee7d67 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,6 +32,14 @@ extern struct fs_struct init_fs; #define INIT_CPUSET_SEQ(tsk) #endif +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#define INIT_PREV_CPUTIME(x) .prev_cputime = { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(x.prev_cputime.lock), \ +}, +#else +#define INIT_PREV_CPUTIME(x) +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ @@ -46,6 +54,7 @@ extern struct fs_struct init_fs; .cputime_atomic = INIT_CPUTIME_ATOMIC, \ .running = 0, \ }, \ + INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ } @@ -246,6 +255,7 @@ extern struct task_group root_task_group; INIT_TASK_RCU_TASKS(tsk) \ INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ + INIT_PREV_CPUTIME(tsk) \ INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ INIT_KASAN(tsk) \ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 13d55206ccf6..869b21dcf503 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -38,6 +38,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), }) void kthread_bind(struct task_struct *k, unsigned int cpu); +void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 84991f185173..bea8dd8ff5e0 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -84,13 +84,21 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) +/* + * The preempt_count offset after preempt_disable(); + */ #if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_DISABLE_OFFSET 1 +# define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET #else -# define PREEMPT_DISABLE_OFFSET 0 +# define PREEMPT_DISABLE_OFFSET 0 #endif /* + * The preempt_count offset after spin_lock() + */ +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET + +/* * The preempt_count offset needed for things like: * * spin_lock_bh() @@ -103,7 +111,7 @@ * * Work as expected. */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot @@ -124,7 +132,8 @@ #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); -#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) +#define preempt_count_dec_and_test() \ + ({ preempt_count_sub(1); should_resched(0); }) #else #define preempt_count_add(val) __preempt_count_add(val) #define preempt_count_sub(val) __preempt_count_sub(val) @@ -184,7 +193,7 @@ do { \ #define preempt_check_resched() \ do { \ - if (should_resched()) \ + if (should_resched(0)) \ __preempt_schedule(); \ } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 04b5ada460b4..119823decc46 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -530,39 +530,49 @@ struct cpu_itimer { }; /** - * struct cputime - snaphsot of system and user cputime + * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode + * @lock: protects the above two fields * - * Gathers a generic snapshot of user and system time. + * Stores previous user/system time values such that we can guarantee + * monotonicity. */ -struct cputime { +struct prev_cputime { +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE cputime_t utime; cputime_t stime; + raw_spinlock_t lock; +#endif }; +static inline void prev_cputime_init(struct prev_cputime *prev) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + prev->utime = prev->stime = 0; + raw_spin_lock_init(&prev->lock); +#endif +} + /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * - * This is an extension of struct cputime that includes the total runtime - * spent by the task from the scheduler point of view. - * - * As a result, this structure groups together three kinds of CPU time - * that are tracked for threads and thread groups. Most things considering - * CPU time want to group these counts together and treat all three - * of them in parallel. + * This structure groups together three kinds of CPU time that are tracked for + * threads and thread groups. Most things considering CPU time want to group + * these counts together and treat all three of them in parallel. */ struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; }; + /* Alternate field names when used to cache expirations. */ -#define prof_exp stime #define virt_exp utime +#define prof_exp stime #define sched_exp sum_exec_runtime #define INIT_CPUTIME \ @@ -715,9 +725,7 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - struct cputime prev_cputime; -#endif + struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1167,29 +1175,24 @@ struct load_weight { u32 inv_weight; }; +/* + * The load_avg/util_avg accumulates an infinite geometric series. + * 1) load_avg factors the amount of time that a sched_entity is + * runnable on a rq into its weight. For cfs_rq, it is the aggregated + * such weights of all runnable and blocked sched_entities. + * 2) util_avg factors frequency scaling into the amount of time + * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. + * For cfs_rq, it is the aggregated such times of all runnable and + * blocked sched_entities. + * The 64 bit load_sum can: + * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with + * the highest weight (=88761) always runnable, we should not overflow + * 2) for entity, support any load.weight always runnable + */ struct sched_avg { - u64 last_runnable_update; - s64 decay_count; - /* - * utilization_avg_contrib describes the amount of time that a - * sched_entity is running on a CPU. It is based on running_avg_sum - * and is scaled in the range [0..SCHED_LOAD_SCALE]. - * load_avg_contrib described the amount of time that a sched_entity - * is runnable on a rq. It is based on both runnable_avg_sum and the - * weight of the task. - */ - unsigned long load_avg_contrib, utilization_avg_contrib; - /* - * These sums represent an infinite geometric series and so are bound - * above by 1024/(1-y). Thus we only need a u32 to store them for all - * choices of y < 1-2^(-32)*1024. - * running_avg_sum reflects the time that the sched_entity is - * effectively running on the CPU. - * runnable_avg_sum represents the amount of time a sched_entity is on - * a runqueue which includes the running time that is monitored by - * running_avg_sum. - */ - u32 runnable_avg_sum, avg_period, running_avg_sum; + u64 last_update_time, load_sum; + u32 util_sum, period_contrib; + unsigned long load_avg, util_avg; }; #ifdef CONFIG_SCHEDSTATS @@ -1255,7 +1258,7 @@ struct sched_entity { #endif #ifdef CONFIG_SMP - /* Per-entity load-tracking */ + /* Per entity load average tracking */ struct sched_avg avg; #endif }; @@ -1351,9 +1354,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; - struct task_struct *last_wakee; - unsigned long wakee_flips; + unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; int wake_cpu; #endif @@ -1481,9 +1484,7 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - struct cputime prev_cputime; -#endif + struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_t vtime_seqlock; unsigned long long vtime_snap; @@ -2214,13 +2215,6 @@ static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } #endif /* CONFIG_NO_HZ_COMMON */ -#ifndef CONFIG_CPUMASK_OFFSTACK -static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) -{ - return set_cpus_allowed_ptr(p, &new_mask); -} -#endif - /* * Do not use outside of architecture code which knows its limitations. * @@ -2897,12 +2891,6 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); -#ifdef CONFIG_PREEMPT_COUNT -#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET -#else -#define PREEMPT_LOCK_OFFSET 0 -#endif - #define cond_resched_lock(lock) ({ \ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index d2abbdb8c6aa..414d924318ce 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -112,25 +112,13 @@ static inline int try_stop_cpus(const struct cpumask *cpumask, * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ -int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); +int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); -/** - * __stop_machine: freeze the machine on all CPUs and run this function - * @fn: the function to run - * @data: the data ptr for the @fn - * @cpus: the cpus to run the @fn() on (NULL = any online cpu) - * - * Description: This is a special version of the above, which assumes cpus - * won't come or go while it's being called. Used by hotplug cpu. - */ -int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); - -int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, +int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); - #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int __stop_machine(int (*fn)(void *), void *data, +static inline int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { unsigned long flags; @@ -141,16 +129,10 @@ static inline int __stop_machine(int (*fn)(void *), void *data, return ret; } -static inline int stop_machine(int (*fn)(void *), void *data, - const struct cpumask *cpus) -{ - return __stop_machine(fn, data, cpus); -} - -static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, +static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { - return __stop_machine(fn, data, cpus); + return stop_machine(fn, data, cpus); } #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index d57a575fe31f..539d6bc3216a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -55,9 +55,9 @@ TRACE_EVENT(sched_kthread_stop_ret, */ DECLARE_EVENT_CLASS(sched_wakeup_template, - TP_PROTO(struct task_struct *p, int success), + TP_PROTO(struct task_struct *p), - TP_ARGS(__perf_task(p), success), + TP_ARGS(__perf_task(p)), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -71,25 +71,37 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - __entry->success = success; + __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", + TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->target_cpu) + __entry->target_cpu) ); +/* + * Tracepoint called when waking a task; this tracepoint is guaranteed to be + * called from the waking context. + */ +DEFINE_EVENT(sched_wakeup_template, sched_waking, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + +/* + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. + * It it not always called from the waking context. + */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, - TP_PROTO(struct task_struct *p, int success), - TP_ARGS(p, success)); + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for waking up a new task: */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, - TP_PROTO(struct task_struct *p, int success), - TP_ARGS(p, success)); + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); #ifdef CREATE_TRACE_POINTS static inline long __trace_sched_switch_state(struct task_struct *p) diff --git a/kernel/cpu.c b/kernel/cpu.c index 3c91a3fdfce5..82cf9dff4295 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -402,7 +402,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) /* * So now all preempt/rcu users must observe !cpu_active(). */ - err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); + err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); diff --git a/kernel/fork.c b/kernel/fork.c index dbd9b8d7b7cc..0d93b4d0617b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1072,6 +1072,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) rcu_assign_pointer(tsk->sighand, sig); if (!sig) return -ENOMEM; + atomic_set(&sig->count, 1); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); return 0; @@ -1133,6 +1134,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); seqlock_init(&sig->stats_lock); + prev_cputime_init(&sig->prev_cputime); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; @@ -1340,9 +1342,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - p->prev_cputime.utime = p->prev_cputime.stime = 0; -#endif + prev_cputime_init(&p->prev_cputime); + |