summaryrefslogtreecommitdiffstats
path: root/kernel/time/posix-cpu-timers.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time/posix-cpu-timers.c')
-rw-r--r--kernel/time/posix-cpu-timers.c1010
1 files changed, 492 insertions, 518 deletions
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 0a426f4e3125..92a431981b1c 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -20,11 +20,20 @@
static void posix_cpu_timer_rearm(struct k_itimer *timer);
+void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
+{
+ posix_cputimers_init(pct);
+ if (cpu_limit != RLIM_INFINITY) {
+ pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
+ pct->timers_active = true;
+ }
+}
+
/*
* Called after updating RLIMIT_CPU to run cpu timer and update
- * tsk->signal->cputime_expires expiration cache if necessary. Needs
- * siglock protection since other code may update expiration cache as
- * well.
+ * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
+ * necessary. Needs siglock protection since other code may update the
+ * expiration cache as well.
*/
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
@@ -35,46 +44,97 @@ void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
spin_unlock_irq(&task->sighand->siglock);
}
-static int check_clock(const clockid_t which_clock)
+/*
+ * Functions for validating access to tasks.
+ */
+static struct task_struct *lookup_task(const pid_t pid, bool thread,
+ bool gettime)
{
- int error = 0;
struct task_struct *p;
- const pid_t pid = CPUCLOCK_PID(which_clock);
-
- if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
- return -EINVAL;
- if (pid == 0)
- return 0;
+ /*
+ * If the encoded PID is 0, then the timer is targeted at current
+ * or the process to which current belongs.
+ */
+ if (!pid)
+ return thread ? current : current->group_leader;
- rcu_read_lock();
p = find_task_by_vpid(pid);
- if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
- same_thread_group(p, current) : has_group_leader_pid(p))) {
- error = -EINVAL;
+ if (!p)
+ return p;
+
+ if (thread)
+ return same_thread_group(p, current) ? p : NULL;
+
+ if (gettime) {
+ /*
+ * For clock_gettime(PROCESS) the task does not need to be
+ * the actual group leader. tsk->sighand gives
+ * access to the group's clock.
+ *
+ * Timers need the group leader because they take a
+ * reference on it and store the task pointer until the
+ * timer is destroyed.
+ */
+ return (p == current || thread_group_leader(p)) ? p : NULL;
}
+
+ /*
+ * For processes require that p is group leader.
+ */
+ return has_group_leader_pid(p) ? p : NULL;
+}
+
+static struct task_struct *__get_task_for_clock(const clockid_t clock,
+ bool getref, bool gettime)
+{
+ const bool thread = !!CPUCLOCK_PERTHREAD(clock);
+ const pid_t pid = CPUCLOCK_PID(clock);
+ struct task_struct *p;
+
+ if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
+ return NULL;
+
+ rcu_read_lock();
+ p = lookup_task(pid, thread, gettime);
+ if (p && getref)
+ get_task_struct(p);
rcu_read_unlock();
+ return p;
+}
- return error;
+static inline struct task_struct *get_task_for_clock(const clockid_t clock)
+{
+ return __get_task_for_clock(clock, true, false);
+}
+
+static inline struct task_struct *get_task_for_clock_get(const clockid_t clock)
+{
+ return __get_task_for_clock(clock, true, true);
+}
+
+static inline int validate_clock_permissions(const clockid_t clock)
+{
+ return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL;
}
/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
*/
-static void bump_cpu_timer(struct k_itimer *timer, u64 now)
+static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
{
+ u64 delta, incr, expires = timer->it.cpu.node.expires;
int i;
- u64 delta, incr;
if (!timer->it_interval)
- return;
+ return expires;
- if (now < timer->it.cpu.expires)
- return;
+ if (now < expires)
+ return expires;
incr = timer->it_interval;
- delta = now + incr - timer->it.cpu.expires;
+ delta = now + incr - expires;
/* Don't use (incr*2 < delta), incr*2 might overflow. */
for (i = 0; incr < delta - incr; i++)
@@ -84,48 +144,26 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
if (delta < incr)
continue;
- timer->it.cpu.expires += incr;
+ timer->it.cpu.node.expires += incr;
timer->it_overrun += 1LL << i;
delta -= incr;
}
+ return timer->it.cpu.node.expires;
}
-/**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime: The struct to compare.
- *
- * Checks @cputime to see if all fields are zero. Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
+/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
+static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
{
- if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
- return 1;
- return 0;
-}
-
-static inline u64 prof_ticks(struct task_struct *p)
-{
- u64 utime, stime;
-
- task_cputime(p, &utime, &stime);
-
- return utime + stime;
-}
-static inline u64 virt_ticks(struct task_struct *p)
-{
- u64 utime, stime;
-
- task_cputime(p, &utime, &stime);
-
- return utime;
+ return !(~pct->bases[CPUCLOCK_PROF].nextevt |
+ ~pct->bases[CPUCLOCK_VIRT].nextevt |
+ ~pct->bases[CPUCLOCK_SCHED].nextevt);
}
static int
posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
{
- int error = check_clock(which_clock);
+ int error = validate_clock_permissions(which_clock);
+
if (!error) {
tp->tv_sec = 0;
tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
@@ -142,42 +180,66 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
}
static int
-posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp)
+posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
{
+ int error = validate_clock_permissions(clock);
+
/*
* You can never reset a CPU clock, but we check for other errors
* in the call before failing with EPERM.
*/
- int error = check_clock(which_clock);
- if (error == 0) {
- error = -EPERM;
- }
- return error;
+ return error ? : -EPERM;
}
-
/*
- * Sample a per-thread clock for the given task.
+ * Sample a per-thread clock for the given task. clkid is validated.
*/
-static int cpu_clock_sample(const clockid_t which_clock,
- struct task_struct *p, u64 *sample)
+static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
{
- switch (CPUCLOCK_WHICH(which_clock)) {
- default:
- return -EINVAL;
+ u64 utime, stime;
+
+ if (clkid == CPUCLOCK_SCHED)
+ return task_sched_runtime(p);
+
+ task_cputime(p, &utime, &stime);
+
+ switch (clkid) {
case CPUCLOCK_PROF:
- *sample = prof_ticks(p);
- break;
+ return utime + stime;
case CPUCLOCK_VIRT:
- *sample = virt_ticks(p);
- break;
- case CPUCLOCK_SCHED:
- *sample = task_sched_runtime(p);
- break;
+ return utime;
+ default:
+ WARN_ON_ONCE(1);
}
return 0;
}
+static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
+{
+ samples[CPUCLOCK_PROF] = stime + utime;
+ samples[CPUCLOCK_VIRT] = utime;
+ samples[CPUCLOCK_SCHED] = rtime;
+}
+
+static void task_sample_cputime(struct task_struct *p, u64 *samples)
+{
+ u64 stime, utime;
+
+ task_cputime(p, &utime, &stime);
+ store_samples(samples, stime, utime, p->se.sum_exec_runtime);
+}
+
+static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
+ u64 *samples)
+{
+ u64 stime, utime, rtime;
+
+ utime = atomic64_read(&at->utime);
+ stime = atomic64_read(&at->stime);
+ rtime = atomic64_read(&at->sum_exec_runtime);
+ store_samples(samples, stime, utime, rtime);
+}
+
/*
* Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
* to avoid race conditions with concurrent updates to cputime.
@@ -193,29 +255,56 @@ retry:
}
}
-static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)
+static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
+ struct task_cputime *sum)
{
__update_gt_cputime(&cputime_atomic->utime, sum->utime);
__update_gt_cputime(&cputime_atomic->stime, sum->stime);
__update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
}
-/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
-static inline void sample_cputime_atomic(struct task_cputime *times,
- struct task_cputime_atomic *atomic_times)
+/**
+ * thread_group_sample_cputime - Sample cputime for a given task
+ * @tsk: Task for which cputime needs to be started
+ * @iimes: Storage for time samples
+ *
+ * Called from sys_getitimer() to calculate the expiry time of an active
+ * timer. That means group cputime accounting is already active. Called
+ * with task sighand lock held.
+ *
+ * Updates @times with an uptodate sample of the thread group cputimes.
+ */
+void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
{
- times->utime = atomic64_read(&atomic_times->utime);
- times->stime = atomic64_read(&atomic_times->stime);
- times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
+
+ WARN_ON_ONCE(!pct->timers_active);
+
+ proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
}
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+/**
+ * thread_group_start_cputime - Start cputime and return a sample
+ * @tsk: Task for which cputime needs to be started
+ * @samples: Storage for time samples
+ *
+ * The thread group cputime accouting is avoided when there are no posix
+ * CPU timers armed. Before starting a timer it's required to check whether
+ * the time accounting is active. If not, a full update of the atomic
+ * accounting store needs to be done and the accounting enabled.
+ *
+ * Updates @times with an uptodate sample of the thread group cputimes.
+ */
+static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
- struct task_cputime sum;
+ struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
/* Check if cputimer isn't running. This is accessed without locking. */
- if (!READ_ONCE(cputimer->running)) {
+ if (!READ_ONCE(pct->timers_active)) {
+ struct task_cputime sum;
+
/*
* The POSIX timer interface allows for absolute time expiry
* values through the TIMER_ABSTIME flag, therefore we have
@@ -225,94 +314,69 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
update_gt_cputime(&cputimer->cputime_atomic, &sum);
/*
- * We're setting cputimer->running without a lock. Ensure
- * this only gets written to in one operation. We set
- * running after update_gt_cputime() as a small optimization,
- * but barriers are not required because update_gt_cputime()
+ * We're setting timers_active without a lock. Ensure this
+ * only gets written to in one operation. We set it after
+ * update_gt_cputime() as a small optimization, but
+ * barriers are not required because update_gt_cputime()
* can handle concurrent updates.
*/
- WRITE_ONCE(cputimer->running, true);
+ WRITE_ONCE(pct->timers_active, true);
}
- sample_cputime_atomic(times, &cputimer->cputime_atomic);
+ proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
}
-/*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with task sighand lock held for safe while_each_thread()
- * traversal.
- */
-static int cpu_clock_sample_group(const clockid_t which_clock,
- struct task_struct *p,
- u64 *sample)
+static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
{
- struct task_cputime cputime;
+ struct task_cputime ct;
- switch (CPUCLOCK_WHICH(which_clock)) {
- default:
- return -EINVAL;
- case CPUCLOCK_PROF:
- thread_group_cputime(p, &cputime);
- *sample = cputime.utime + cputime.stime;
- break;
- case CPUCLOCK_VIRT:
- thread_group_cputime(p, &cputime);
- *sample = cputime.utime;
- break;
- case CPUCLOCK_SCHED:
- thread_group_cputime(p, &cputime);
- *sample = cputime.sum_exec_runtime;
- break;
- }
- return 0;
+ thread_group_cputime(tsk, &ct);
+ store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
}
-static int posix_cpu_clock_get_task(struct task_struct *tsk,
- const clockid_t which_clock,
- struct timespec64 *tp)
+/*
+ * Sample a process (thread group) clock for the given task clkid. If the
+ * group's cputime accounting is already enabled, read the atomic
+ * store. Otherwise a full update is required. Task's sighand lock must be
+ * held to protect the task traversal on a full update. clkid is already
+ * validated.
+ */
+static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
+ bool start)
{
- int err = -EINVAL;
- u64 rtn;
+ struct thread_group_cputimer *cputimer = &p->signal->cputimer;
+ struct posix_cputimers *pct = &p->signal->posix_cputimers;
+ u64 samples[CPUCLOCK_MAX];
- if (CPUCLOCK_PERTHREAD(which_clock)) {
- if (same_thread_group(tsk, current))
- err = cpu_clock_sample(which_clock, tsk, &rtn);
+ if (!READ_ONCE(pct->timers_active)) {
+ if (start)
+ thread_group_start_cputime(p, samples);
+ else
+ __thread_group_cputime(p, samples);
} else {
- if (tsk == current || thread_group_leader(tsk))
- err = cpu_clock_sample_group(which_clock, tsk, &rtn);
+ proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
}
- if (!err)
- *tp = ns_to_timespec64(rtn);
-
- return err;
+ return samples[clkid];
}
-
-static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp)
+static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
{
- const pid_t pid = CPUCLOCK_PID(which_clock);
- int err = -EINVAL;
+ const clockid_t clkid = CPUCLOCK_WHICH(clock);
+ struct task_struct *tsk;
+ u64 t;
- if (pid == 0) {
- /*
- * Special case constant value for our own clocks.
- * We don't have to do any lookup to find ourselves.
- */
- err = posix_cpu_clock_get_task(current, which_clock, tp);
- } else {
- /*
- * Find the given PID, and validate that the caller
- * should be able to see it.
- */
- struct task_struct *p;
- rcu_read_lock();
- p = find_task_by_vpid(pid);
- if (p)
- err = posix_cpu_clock_get_task(p, which_clock, tp);
- rcu_read_unlock();
- }
+ tsk = get_task_for_clock_get(clock);
+ if (!tsk)
+ return -EINVAL;
- return err;
+ if (CPUCLOCK_PERTHREAD(clock))
+ t = cpu_clock_sample(clkid, tsk);
+ else
+ t = cpu_clock_sample_group(clkid, tsk, false);
+ put_task_struct(tsk);
+
+ *tp = ns_to_timespec64(t);
+ return 0;
}
/*
@@ -322,44 +386,15 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *t
*/
static int posix_cpu_timer_create(struct k_itimer *new_timer)
{
- int ret = 0;
- const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
- struct task_struct *p;
+ struct task_struct *p = get_task_for_clock(new_timer->it_clock);
- if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
+ if (!p)
return -EINVAL;
new_timer->kclock = &clock_posix_cpu;
-
- INIT_LIST_HEAD(&new_timer->it.cpu.entry);
-
- rcu_read_lock();
- if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
- if (pid == 0) {
- p = current;
- } else {
- p = find_task_by_vpid(pid);
- if (p && !same_thread_group(p, current))
- p = NULL;
- }
- } else {
- if (pid == 0) {
- p = current->group_leader;
- } else {
- p = find_task_by_vpid(pid);
- if (p && !has_group_leader_pid(p))
- p = NULL;
- }
- }
+ timerqueue_init(&new_timer->it.cpu.node);
new_timer->it.cpu.task = p;
- if (p) {
- get_task_struct(p);
- } else {
- ret = -EINVAL;
- }
- rcu_read_unlock();
-
- return ret;
+ return 0;
}
/*
@@ -370,12 +405,14 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
*/
static int posix_cpu_timer_del(struct k_itimer *timer)
{
- int ret = 0;
- unsigned long flags;
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ struct task_struct *p = ctmr->task;
struct sighand_struct *sighand;
- struct task_struct *p = timer->it.cpu.task;
+ unsigned long flags;
+ int ret = 0;
- WARN_ON_ONCE(p == NULL);
+ if (WARN_ON_ONCE(!p))
+ return -EINVAL;
/*
* Protect against sighand release/switch in exit/exec and process/
@@ -384,15 +421,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
sighand = lock_task_sighand(p, &flags);
if (unlikely(sighand == NULL)) {
/*
- * We raced with the reaping of the task.
- * The deletion should have cleared us off the list.
+ * This raced with the reaping of the task. The exit cleanup
+ * should have removed this timer from the timer queue.
*/
- WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
+ WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
} else {
if (timer->it.cpu.firing)
ret = TIMER_RETRY;
else
- list_del(&timer->it.cpu.entry);
+ cpu_timer_dequeue(ctmr);
unlock_task_sighand(p, &flags);
}
@@ -403,25 +440,30 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
return ret;
}
-static void cleanup_timers_list(struct list_head *head)
+static void cleanup_timerqueue(struct timerqueue_head *head)
{
- struct cpu_timer_list *timer, *next;
+ struct timerqueue_node *node;
+ struct cpu_timer *ctmr;
- list_for_each_entry_safe(timer, next, head, entry)
- list_del_init(&timer->entry);
+ while ((node = timerqueue_getnext(head))) {
+ timerqueue_del(head, node);
+ ctmr = container_of(node, struct cpu_timer, node);
+ ctmr->head = NULL;
+ }
}
/*
- * Clean out CPU timers still ticking when a thread exited. The task
- * pointer is cleared, and the expiry time is replaced with the residual
- * time for later timer_gettime calls to return.
+ * Clean out CPU timers which are still armed when a thread exits. The
+ * timers are only removed from the list. No other updates are done. The
+ * corresponding posix timers are still accessible, but cannot be rearmed.
+ *
* This must be called with the siglock held.
*/
-static void cleanup_timers(struct list_head *head)
+static void cleanup_timers(struct posix_cputimers *pct)
{
- cleanup_timers_list(head);
- cleanup_timers_list(++head);
- cleanup_timers_list(++head);
+ cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
+ cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
+ cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
}
/*
@@ -431,16 +473,11 @@ static void cleanup_timers(struct list_head *head)
*/
void posix_cpu_timers_exit(struct task_struct *tsk)
{
- cleanup_timers(tsk->cpu_timers);
+ cleanup_timers(&tsk->posix_cputimers);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
- cleanup_timers(tsk->signal->cpu_timers);
-}
-
-static inline int expires_gt(u64 expires, u64 new_exp)
-{
- return expires == 0 || expires > new_exp;
+ cleanup_timers(&tsk->signal->posix_cputimers);
}
/*
@@ -449,58 +486,33 @@ static inline int expires_gt(u64 expires, u64 new_exp)
*/
static void arm_timer(struct k_itimer *timer)
{
- struct task_struct *p = timer->it.cpu.task;
- struct list_head *head, *listpos;
- struct task_cputime *cputime_expires;
- struct cpu_timer_list *const nt = &timer->it.cpu;
- struct cpu_timer_list *next;
-
- if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- head = p->cpu_timers;
- cputime_expires = &p->cputime_expires;
- } else {
- head = p->signal->cpu_timers;
- cputime_expires = &p->signal->cputime_expires;
- }
- head += CPUCLOCK_WHICH(timer->it_clock);
-
- listpos = head;
- list_for_each_entry(next, head, entry) {
- if (nt->expires < next->expires)
- break;
- listpos = &next->entry;
- }
- list_add(&nt->entry, listpos);
-
- if (listpos == head) {
- u64 exp = nt->expires;
+ int clkidx = CPUCLOCK_WHICH(timer->it_clock);
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ u64 newexp = cpu_timer_getexpires(ctmr);
+ struct task_struct *p = ctmr->task;
+ struct posix_cputimer_base *base;
+
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ base = p->posix_cputimers.bases + clkidx;
+ else
+ base = p->signal->posix_cputimers.bases + clkidx;
+
+ if (!cpu_timer_enqueue(&base->tqhead, ctmr))
+ return;
- /*
- * We are the new earliest-expiring POSIX 1.b timer, hence
- * need to update expiration cache. Take into account that
- * for process timers we share expiration cache with itimers
- * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
- */
+ /*
+ * We are the new earliest-expiring POSIX 1.b timer, hence
+ * need to update expiration cache. Take into account that
+ * for process timers we share expiration cache with itimers
+ * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
+ */
+ if (newexp < base->nextevt)
+ base->nextevt = newexp;
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- case CPUCLOCK_PROF:
- if (expires_gt(cputime_expires->prof_exp, exp))
- cputime_expires->prof_exp = exp;
- break;
- case CPUCLOCK_VIRT:
- if (expires_gt(cputime_expires->virt_exp, exp))
- cputime_expires->virt_exp = exp;
- break;
- case CPUCLOCK_SCHED:
- if (expires_gt(cputime_expires->sched_exp, exp))
- cputime_expires->sched_exp = exp;
- break;
- }
- if (CPUCLOCK_PERTHREAD(timer->it_clock))
- tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
- else
- tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
- }
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
+ else
+ tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
}
/*
@@ -508,24 +520,26 @@ static void arm_timer(struct k_itimer *timer)
*/
static void cpu_timer_fire(struct k_itimer *timer)
{
+ struct cpu_timer *ctmr = &timer->it.cpu;
+
if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
/*
* User don't want any signal.
*/
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
} else if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
*/
wake_up_process(timer->it_process);
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
} else if (!timer->it_interval) {
/*
* One-shot timer. Clear it as soon as it's fired.
*/
posix_timer_event(timer, 0);
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
/*
* The signal did not get queued because the signal
@@ -539,33 +553,6 @@ static void cpu_timer_fire(struct k_itimer *timer)
}
/*
- * Sample a process (thread group) timer for the given group_leader task.
- * Must be called with task sighand lock held for safe while_each_thread()
- * traversal.
- */
-static int cpu_timer_sample_group(const clockid_t which_clock,
- struct task_struct *p, u64 *sample)
-{
- struct task_cputime cputime;
-
- thread_group_cputimer(p, &cputime);
- switch (CPUCLOCK_WHICH(which_clock)) {
- default:
- return -EINVAL;
- case CPUCLOCK_PROF:
- *sample = cputime.utime + cputime.stime;
- break;
- case CPUCLOCK_VIRT:
- *sample = cputime.utime;
- break;
- case CPUCLOCK_SCHED:
- *sample = cputime.sum_exec_runtime;
- break;
- }
- return 0;
-}
-
-/*
* Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled.
* If we return TIMER_RETRY, it's necessary to release the timer's lock
@@ -574,13 +561,16 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
struct itimerspec64 *new, struct itimerspec64 *old)
{
- unsigned long flags;
- struct sighand_struct *sighand;
- struct task_struct *p = timer->it.cpu.task;
+ clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
u64 old_expires, new_expires, old_incr, val;
- int ret;
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ struct task_struct *p = ctmr->task;
+ struct sighand_struct *sighand;
+ unsigned long flags;
+ int ret = 0;
- WARN_ON_ONCE(p == NULL);
+ if (WARN_ON_ONCE(!p))
+ return -EINVAL;
/*
* Use the to_ktime conversion because that clamps the maximum
@@ -597,22 +587,21 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
* If p has just been reaped, we can no
* longer get any information about it at all.
*/
- if (unlikely(sighand == NULL)) {
+ if (unlikely(sighand == NULL))
return -ESRCH;
- }
/*
* Disarm any old timer after extracting its expiry time.
*/
-
- ret = 0;
old_incr = timer->it_interval;
- old_expires = timer->it.cpu.expires;
+ old_expires = cpu_timer_getexpires(ctmr);
+
if (unlikely(timer->it.cpu.firing)) {
timer->it.cpu.firing = -1;
ret = TIMER_RETRY;
- } else
- list_del_init(&timer->it.cpu.entry);
+ } else {
+ cpu_timer_dequeue(ctmr);
+ }
/*
* We need to sample the current value to convert the new
@@ -622,11 +611,10 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
* times (in arm_timer). With an absolute time, we must
* check if it's already passed. In short, we need a sample.
*/
- if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- cpu_clock_sample(timer->it_clock, p, &val);
- } else {
- cpu_timer_sample_group(timer->it_clock, p, &val);
- }
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ val = cpu_clock_sample(clkid, p);
+ else
+ val = cpu_clock_sample_group(clkid, p, true);
if (old) {
if (old_expires == 0) {
@@ -634,18 +622,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
old->it_value.tv_nsec = 0;
} else {
/*
- * Update the timer in case it has
- * overrun already. If it has,
- * we'll report it as having overrun
- * and with the next reloaded timer
- * already ticking, though we are
- * swallowing that pending
- * notification here to install the
- * new setting.
+ * Update the timer in case it has overrun already.
+ * If it has, we'll report it as having overrun and
+ * with the next reloaded timer already ticking,
+ * though we are swallowing that pending
+ * notification here to install the new setting.
*/
- bump_cpu_timer(timer, val);
- if (val < timer->it.cpu.expires) {
- old_expires = timer->it.cpu.expires - val;
+ u64 exp = bump_cpu_timer(timer, val);
+
+ if (val < exp) {
+ old_expires = exp - val;
old->it_value = ns_to_timespec64(old_expires);
} else {
old->it_value.tv_nsec = 1;
@@ -674,7 +660,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
* For a timer with no notification action, we don't actually
* arm the timer (we'll just fake it for timer_gettime).
*/
- timer->it.cpu.expires = new_expires;
+ cpu_timer_setexpires(ctmr, new_expires);
if (new_expires != 0 && val < new_expires) {
arm_timer(timer);
}
@@ -715,24 +701,27 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
{
- u64 now;
- struct task_struct *p = timer->it.cpu.task;
+ clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ u64 now, expires = cpu_timer_getexpires(ctmr);
+ struct task_struct *p = ctmr->task;
- WARN_ON_ONCE(p == NULL);
+ if (WARN_ON_ONCE(!p))
+ return;
/*
* Easy part: convert the reload time.
*/
itp->it_interval = ktime_to_timespec64(timer->it_interval);
- if (!timer->it.cpu.expires)
+ if (!expires)
return;
/*
* Sample the clock to take the difference with the expiry time.
*/
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- cpu_clock_sample(timer->it_clock, p, &now);
+ now = cpu_clock_sample(clkid, p);
} else {
struct sighand_struct *sighand;
unsigned long flags;
@@ -747,18 +736,18 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
/*
* The process has been reaped.
* We can't even collect a sample any more.
- * Call the timer disarmed, nothing else to do.
+ * Disarm the timer, nothing else to do.
*/
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
return;
} else {
- cpu_timer_sample_group(timer->it_clock, p, &now);
+ now = cpu_clock_sample_group(clkid, p, false);
unlock_task_sighand(p, &flags);
}
}
- if (now < timer->it.cpu.expires) {
- itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now);
+ if (now < expires) {
+ itp->it_value = ns_to_timespec64(expires - now);
} else {
/*
* The timer should have expired already, but the firing
@@ -769,26 +758,42 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
}
}
-static unsigned long long
-check_timers_list(struct list_head *timers,
- struct list_head *firing,
- unsigned long long curr)
-{
- int maxfire = 20;
+#define MAX_COLLECTED 20
- while (!list_empty(timers)) {
- struct cpu_timer_list *t;
+static u64 collect_timerqueue(struct timerqueue_head *head,
+ struct list_head *firing, u64 now)
+{
+ struct timerqueue_node *next;
+ int i = 0;
+
+ while ((next = timerqueue_getnext(head))) {
+ struct cpu_timer *ctmr;
+ u64 expires;
+
+ ctmr = container_of(next, struct cpu_timer, node);
+ expires = cpu_timer_getexpires(ctmr);
+ /* Limit the number of timers to expire at once */
+ if (++i == MAX_COLLECTED || now < expires)
+ return expires;
+
+ ctmr->firing = 1;
+ cpu_timer_dequeue(ctmr);
+ list_add_tail(&ctmr->elist, firing);
+ }
- t = list_first_entry(timers, struct cpu_timer_list, entry);
+ return U64_MAX;
+}
- if (!--maxfire || curr < t->expires)
- return t->expires;
+static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
+ struct list_head *firing)
+{
+ struct posix_cputimer_base *base = pct->bases;
+ int i;
- t->firing = 1;
- list_move_tail(&t->entry, firing);
+ for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
+ base->nextevt = collect_timerqueue(&base->tqhead, firing,
+ samples[i]);
}
-
- return 0;
}
static inline void check_dl_overrun(struct task_struct *tsk)
@@ -799,6 +804,20 @@ static inline void check_dl_overrun(struct task_struct *tsk)
}
}
+static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
+{
+ if (time < limit)
+ return false;
+
+ if (print_fatal_signals) {
+ pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
+ rt ? "RT" : "CPU", hard ? "hard" : "soft",
+ current->comm, task_pid_nr(current));
+ }
+ __group_send_sig_info(signo, SEND_SIG_PRIV, current);
+ return true;
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them off
* the tsk->cpu_timers[N] list onto the firing list. Here we update the
@@ -807,76 +826,50 @@ static inline void check_dl_overrun(struct task_struct *tsk)
static void check_thread_timers(struct task_struct *tsk,
struct list_head *firing)
{
- struct list_head *timers = tsk->cpu_timers;
- struct task_cputime *tsk_expires = &tsk->cputime_expires;
- u64 expires;
+ struct posix_cputimers *pct = &tsk->posix_cputimers;
+ u64 samples[CPUCLOCK_MAX];
unsigned long soft;
if (dl_task(tsk))
check_dl_overrun(tsk);
- /*
- * If cputime_expires is zero, then there are no active
- * per thread CPU timers.
- */
- if (task_cputime_zero(&tsk->cputime_expires))
+ if (expiry_cache_is_inactive(pct))
return;
- expires = check_timers_list(timers, firing, prof_ticks(tsk));
- tsk_expires->prof_exp = expires;
-
- expires = check_timers_list(++timers, firing, virt_ticks(tsk));
- tsk_expires->virt_exp = expires;
-
- tsk_expires->sched_exp = check_timers_list(++timers, firing,
- tsk->se.sum_exec_runtime);
+ task_sample_cputime(tsk, samples);
+ collect_posix_cputimers(pct, samples, firing);
/*
* Check for the special case thread timers.
*/
soft = task_rlimit(tsk, RLIMIT_RTTIME);
if (soft != RLIM_INFINITY) {
+ /* Task RT timeout is accounted in jiffies. RTTIME is usec */
+ unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
+ /* At the hard limit, send SIGKILL. No further action. */
if (hard != RLIM_INFINITY &&
- tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
- /*
- * At the hard limit, we just die.
- * No need to calculate anything else now.
- */
- if (print_fatal_signals) {
- pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
- tsk->comm, task_pid_nr(tsk));
- }
- __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ check_rlimit(rttime, hard, SIGKILL, true, true))
return;
- }
- if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
- /*
- * At the soft limit, send a SIGXCPU every second.
- */
- if (soft < hard) {
- soft += USEC_PER_SEC;
- tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur =
- soft;
- }
- if (print_fatal_signals) {
- pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
- tsk->comm, task_pid_nr(tsk));
- }
- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+
+ /* At the soft limit, send a SIGXCPU every second */
+ if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
+ soft += USEC_PER_SEC;
+ tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
}
}
- if (task_cputime_zero(tsk_expires))
+
+ if (expiry_cache_is_inactive(pct))
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
}
static inline void stop_process_timers(struct signal_struct *sig)
{
- struct thread_group_cputimer *cputimer = &sig->cputimer;
+ struct posix_cputimers *pct = &sig->posix_cputimers;
- /* Turn off cputimer->running. This is done without locking. */
- WRITE_ONCE(cputimer->running, false);
+ /* Turn off the active flag. This is done without locking. */