summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 08:15:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 08:15:40 -0700
commitab86e974f04b1cd827a9c7c35273834ebcd9ab38 (patch)
tree41df33732d2700d6d57d1e7ab3f430942f09ffcc /kernel
parent8700c95adb033843fc163d112b9d21d4fda78018 (diff)
parent6f7a05d7018de222e40ca003721037a530979974 (diff)
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core timer updates from Ingo Molnar: "The main changes in this cycle's merge are: - Implement shadow timekeeper to shorten in kernel reader side blocking, by Thomas Gleixner. - Posix timers enhancements by Pavel Emelyanov: - allocate timer ID per process, so that exact timer ID allocations can be re-created be checkpoint/restore code. - debuggability and tooling (/proc/PID/timers, etc.) improvements. - suspend/resume enhancements by Feng Tang: on certain new Intel Atom processors (Penwell and Cloverview), there is a feature that the TSC won't stop in S3 state, so the TSC value won't be reset to 0 after resume. This can be taken advantage of by the generic via the CLOCK_SOURCE_SUSPEND_NONSTOP flag: instead of using the RTC to recover/approximate sleep time, the main (and precise) clocksource can be used. - Fix /proc/timer_list for 4096 CPUs by Nathan Zimmer: on so many CPUs the file goes beyond 4MB of size and thus the current simplistic seqfile approach fails. Convert /proc/timer_list to a proper seq_file with its own iterator. - Cleanups and refactorings of the core timekeeping code by John Stultz. - International Atomic Clock time is managed by the NTP code internally currently but not exposed externally. Separate the TAI code out and add CLOCK_TAI support and TAI support to the hrtimer and posix-timer code, by John Stultz. - Add deep idle support enhacement to the broadcast clockevents core timer code, by Daniel Lezcano: add an opt-in CLOCK_EVT_FEAT_DYNIRQ clockevents feature (which will be utilized by future clockevents driver updates), which allows the use of IRQ affinities to avoid spurious wakeups of idle CPUs - the right CPU with an expiring timer will be woken. - Add new ARM bcm281xx clocksource driver, by Christian Daudt - ... various other fixes and cleanups" * 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (52 commits) clockevents: Set dummy handler on CPU_DEAD shutdown timekeeping: Update tk->cycle_last in resume posix-timers: Remove unused variable clockevents: Switch into oneshot mode even if broadcast registered late timer_list: Convert timer list to be a proper seq_file timer_list: Split timer_list_show_tickdevices posix-timers: Show sigevent info in proc file posix-timers: Introduce /proc/PID/timers file posix timers: Allocate timer id per process (v2) timekeeping: Make sure to notify hrtimers when TAI offset changes hrtimer: Fix ktime_add_ns() overflow on 32bit architectures hrtimer: Add expiry time overflow check in hrtimer_interrupt timekeeping: Shorten seq_count region timekeeping: Implement a shadow timekeeper timekeeping: Delay update of clock->cycle_last timekeeping: Store cycle_last value in timekeeper struct as well ntp: Remove ntp_lock, using the timekeeping locks to protect ntp state timekeeping: Simplify tai updating from do_adjtimex timekeeping: Hold timekeepering locks in do_adjtimex and hardpps timekeeping: Move ADJ_SETOFFSET to top level do_adjtimex() ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu/idle.c11
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/posix-timers.c121
-rw-r--r--kernel/time.c11
-rw-r--r--kernel/time/ntp.c105
-rw-r--r--kernel/time/ntp_internal.h12
-rw-r--r--kernel/time/tick-broadcast.c239
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/tick-internal.h5
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/time/timekeeping.c396
-rw-r--r--kernel/time/timer_list.c104
12 files changed, 757 insertions, 279 deletions
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index 168cf407a254..8b86c0c68edf 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -76,7 +76,16 @@ static void cpu_idle_loop(void)
local_irq_disable();
arch_cpu_idle_enter();
- if (cpu_idle_force_poll) {
+ /*
+ * In poll mode we reenable interrupts and spin.
+ *
+ * Also if we detected in the wakeup from idle
+ * path that the tick broadcast device expired
+ * for us, we don't want to go deep idle as we
+ * know that the IPI is going to arrive right
+ * away
+ */
+ if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
cpu_idle_poll();
} else {
current_clr_polling();
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 14be27feda49..609d8ff38b74 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -84,6 +84,12 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.get_time = &ktime_get_boottime,
.resolution = KTIME_LOW_RES,
},
+ {
+ .index = HRTIMER_BASE_TAI,
+ .clockid = CLOCK_TAI,
+ .get_time = &ktime_get_clocktai,
+ .resolution = KTIME_LOW_RES,
+ },
}
};
@@ -91,6 +97,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
+ [CLOCK_TAI] = HRTIMER_BASE_TAI,
};
static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -107,8 +114,10 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
{
ktime_t xtim, mono, boot;
struct timespec xts, tom, slp;
+ s32 tai_offset;
get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
+ tai_offset = timekeeping_get_tai_offset();
xtim = timespec_to_ktime(xts);
mono = ktime_add(xtim, timespec_to_ktime(tom));
@@ -116,6 +125,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+ base->clock_base[HRTIMER_BASE_TAI].softirq_time =
+ ktime_add(xtim, ktime_set(tai_offset, 0));
}
/*
@@ -276,6 +287,10 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
} else {
unsigned long rem = do_div(nsec, NSEC_PER_SEC);
+ /* Make sure nsec fits into long */
+ if (unlikely(nsec > KTIME_SEC_MAX))
+ return (ktime_t){ .tv64 = KTIME_MAX };
+
tmp = ktime_set((long)nsec, rem);
}
@@ -652,8 +667,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
{
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
+ ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
- return ktime_get_update_offsets(offs_real, offs_boot);
+ return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
}
/*
@@ -1011,7 +1027,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
* @timer: the timer to be added
* @tim: expiry time
* @delta_ns: "slack" range for the timer
- * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
+ * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
+ * relative (HRTIMER_MODE_REL)
*
* Returns:
* 0 on success
@@ -1028,7 +1045,8 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
* hrtimer_start - (re)start an hrtimer on the current CPU
* @timer: the timer to be added
* @tim: expiry time
- * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
+ * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
+ * relative (HRTIMER_MODE_REL)
*
* Returns:
* 0 on success
@@ -1310,6 +1328,8 @@ retry:
expires = ktime_sub(hrtimer_get_expires(timer),
base->offset);
+ if (expires.tv64 < 0)
+ expires.tv64 = KTIME_MAX;
if (expires.tv64 < expires_next.tv64)
expires_next = expires;
break;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 6edbb2c55c22..424c2d4265c9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -40,38 +40,31 @@
#include <linux/list.h>
#include <linux/init.h>
#include <linux/compiler.h>
-#include <linux/idr.h>
+#include <linux/hash.h>
#include <linux/posix-clock.h>
#include <linux/posix-timers.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/export.h>
+#include <linux/hashtable.h>
/*
- * Management arrays for POSIX timers. Timers are kept in slab memory
- * Timer ids are allocated by an external routine that keeps track of the
- * id and the timer. The external interface is:
- *
- * void *idr_find(struct idr *idp, int id); to find timer_id <id>
- * int idr_get_new(struct idr *idp, void *ptr); to get a new id and
- * related it to <ptr>
- * void idr_remove(struct idr *idp, int id); to release <id>
- * void idr_init(struct idr *idp); to initialize <idp>
- * which we supply.
- * The idr_get_new *may* call slab for more memory so it must not be
- * called under a spin lock. Likewise idr_remore may release memory
- * (but it may be ok to do this under a lock...).
- * idr_find is just a memory look up and is quite fast. A -1 return
- * indicates that the requested id does not exist.
+ * Management arrays for POSIX timers. Timers are now kept in static hash table
+ * with 512 entries.
+ * Timer ids are allocated by local routine, which selects proper hash head by
+ * key, constructed from current->signal address and per signal struct counter.
+ * This keeps timer ids unique per process, but now they can intersect between
+ * processes.
*/
/*
* Lets keep our timers in a slab cache :-)
*/
static struct kmem_cache *posix_timers_cache;
-static struct idr posix_timers_id;
-static DEFINE_SPINLOCK(idr_lock);
+
+static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
+static DEFINE_SPINLOCK(hash_lock);
/*
* we assume that the new SIGEV_THREAD_ID shares no bits with the other
@@ -152,6 +145,56 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
__timr; \
})
+static int hash(struct signal_struct *sig, unsigned int nr)
+{
+ return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
+}
+
+static struct k_itimer *__posix_timers_find(struct hlist_head *head,
+ struct signal_struct *sig,
+ timer_t id)
+{
+ struct k_itimer *timer;
+
+ hlist_for_each_entry_rcu(timer, head, t_hash) {
+ if ((timer->it_signal == sig) && (timer->it_id == id))
+ return timer;
+ }
+ return NULL;
+}
+
+static struct k_itimer *posix_timer_by_id(timer_t id)
+{
+ struct signal_struct *sig = current->signal;
+ struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
+
+ return __posix_timers_find(head, sig, id);
+}
+
+static int posix_timer_add(struct k_itimer *timer)
+{
+ struct signal_struct *sig = current->signal;
+ int first_free_id = sig->posix_timer_id;
+ struct hlist_head *head;
+ int ret = -ENOENT;
+
+ do {
+ spin_lock(&hash_lock);
+ head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
+ if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
+ hlist_add_head_rcu(&timer->t_hash, head);
+ ret = sig->posix_timer_id;
+ }
+ if (++sig->posix_timer_id < 0)
+ sig->posix_timer_id = 0;
+ if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
+ /* Loop over all possible ids completed */
+ ret = -EAGAIN;
+ spin_unlock(&hash_lock);
+ } while (ret == -ENOENT);
+ return ret;
+}
+
static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
{
spin_unlock_irqrestore(&timr->it_lock, flags);
@@ -221,6 +264,11 @@ static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
return 0;
}
+static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
+{
+ timekeeping_clocktai(tp);
+ return 0;
+}
/*
* Initialize everything, well, just everything in Posix clocks/timers ;)
@@ -261,6 +309,16 @@ static __init int init_posix_timers(void)
.clock_getres = posix_get_coarse_res,
.clock_get = posix_get_monotonic_coarse,
};
+ struct k_clock clock_tai = {
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_get_tai,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
+ };
struct k_clock clock_boottime = {
.clock_getres = hrtimer_get_res,
.clock_get = posix_get_boottime,
@@ -278,11 +336,11 @@ static __init int init_posix_timers(void)
posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
+ posix_timers_register_clock(CLOCK_TAI, &clock_tai);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, SLAB_PANIC,
NULL);
- idr_init(&posix_timers_id);
return 0;
}
@@ -504,9 +562,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
{
if (it_id_set) {
unsigned long flags;
- spin_lock_irqsave(&idr_lock, flags);
- idr_remove(&posix_timers_id, tmr->it_id);
- spin_unlock_irqrestore(&idr_lock, flags);
+ spin_lock_irqsave(&hash_lock, flags);
+ hlist_del_rcu(&tmr->t_hash);
+ spin_unlock_irqrestore(&hash_lock, flags);
}
put_pid(tmr->it_pid);
sigqueue_free(tmr->sigq);
@@ -552,22 +610,11 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
return -EAGAIN;
spin_lock_init(&new_timer->it_lock);
-
- idr_preload(GFP_KERNEL);
- spin_lock_irq(&idr_lock);
- error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT);
- spin_unlock_irq(&idr_lock);
- idr_preload_end();
- if (error < 0) {
- /*
- * Weird looking, but we return EAGAIN if the IDR is
- * full (proper POSIX return value for this)
- */
- if (error == -ENOSPC)
- error = -EAGAIN;
+ new_timer_id = posix_timer_add(new_timer);
+ if (new_timer_id < 0) {
+ error = new_timer_id;
goto out;
}
- new_timer_id = error;
it_id_set = IT_ID_SET;
new_timer->it_id = (timer_t) new_timer_id;
@@ -645,7 +692,7 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
return NULL;
rcu_read_lock();
- timr = idr_find(&posix_timers_id, (int)timer_id);
+ timr = posix_timer_by_id(timer_id);
if (timr) {
spin_lock_irqsave(&timr->it_lock, *flags);
if (timr->it_signal == current->signal) {
diff --git a/kernel/time.c b/kernel/time.c
index f8342a41efa6..d3617dbd3dca 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -138,13 +138,14 @@ int persistent_clock_is_local;
*/
static inline void warp_clock(void)
{
- struct timespec adjust;
+ if (sys_tz.tz_minuteswest != 0) {
+ struct timespec adjust;
- adjust = current_kernel_time();
- if (sys_tz.tz_minuteswest != 0)
persistent_clock_is_local = 1;
- adjust.tv_sec += sys_tz.tz_minuteswest * 60;
- do_settimeofday(&adjust);
+ adjust.tv_sec = sys_tz.tz_minuteswest * 60;
+ adjust.tv_nsec = 0;
+ timekeeping_inject_offset(&adjust);
+ }
}
/*
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 072bb066bb7d..12ff13a838c6 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -18,13 +18,14 @@
#include <linux/rtc.h>
#include "tick-internal.h"
+#include "ntp_internal.h"
/*
* NTP timekeeping variables:
+ *
+ * Note: All of the NTP state is protected by the timekeeping locks.
*/
-DEFINE_RAW_SPINLOCK(ntp_lock);
-
/* USER_HZ period (usecs): */
unsigned long tick_usec = TICK_USEC;
@@ -53,9 +54,6 @@ static int time_state = TIME_OK;
/* clock status bits: */
static int time_status = STA_UNSYNC;
-/* TAI offset (secs): */
-static long time_tai;
-
/* time adjustment (nsecs): */
static s64 time_offset;
@@ -134,8 +132,6 @@ static inline void pps_reset_freq_interval(void)
/**
* pps_clear - Clears the PPS state variables
- *
- * Must be called while holding a write on the ntp_lock
*/
static inline void pps_clear(void)
{
@@ -150,8 +146,6 @@ static inline void pps_clear(void)
/* Decrease pps_valid to indicate that another second has passed since
* the last PPS signal. When it reaches 0, indicate that PPS signal is
* missing.
- *
- * Must be called while holding a write on the ntp_lock
*/
static inline void pps_dec_valid(void)
{
@@ -346,10 +340,6 @@ static void ntp_update_offset(long offset)
*/
void ntp_clear(void)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ntp_lock, flags);
-
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
@@ -362,20 +352,12 @@ void ntp_clear(void)
/* Clear PPS state variables */
pps_clear();
- raw_spin_unlock_irqrestore(&ntp_lock, flags);
-
}
u64 ntp_tick_length(void)
{
- unsigned long flags;
- s64 ret;
-
- raw_spin_lock_irqsave(&ntp_lock, flags);
- ret = tick_length;
- raw_spin_unlock_irqrestore(&ntp_lock, flags);
- return ret;
+ return tick_length;
}
@@ -393,9 +375,6 @@ int second_overflow(unsigned long secs)
{
s64 delta;
int leap = 0;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ntp_lock, flags);
/*
* Leap second processing. If in leap-insert state at the end of the
@@ -415,7 +394,6 @@ int second_overflow(unsigned long secs)
else if (secs % 86400 == 0) {
leap = -1;
time_state = TIME_OOP;
- time_tai++;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
}
@@ -425,7 +403,6 @@ int second_overflow(unsigned long secs)
time_state = TIME_OK;
else if ((secs + 1) % 86400 == 0) {
leap = 1;
- time_tai--;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
@@ -479,8 +456,6 @@ int second_overflow(unsigned long secs)
time_adjust = 0;
out:
- raw_spin_unlock_irqrestore(&ntp_lock, flags);
-
return leap;
}
@@ -575,11 +550,10 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
time_status |= txc->status & ~STA_RONLY;
}
-/*
- * Called with ntp_lock held, so we can access and modify
- * all the global NTP state:
- */
-static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
+
+static inline void process_adjtimex_modes(struct timex *txc,
+ struct timespec *ts,
+ s32 *time_tai)
{
if (txc->modes & ADJ_STATUS)
process_adj_status(txc, ts);
@@ -613,7 +587,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
}
if (txc->modes & ADJ_TAI && txc->constant > 0)
- time_tai = txc->constant;
+ *time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
ntp_update_offset(txc->offset);
@@ -625,16 +599,13 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
ntp_update_frequency();
}
-/*
- * adjtimex mainly allows reading (and writing, if superuser) of
- * kernel time-keeping variables. used by xntpd.
+
+
+/**
+ * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
*/
-int do_adjtimex(struct timex *txc)
+int ntp_validate_timex(struct timex *txc)
{
- struct timespec ts;
- int result;
-
- /* Validate the data before disabling interrupts */
if (txc->modes & ADJ_ADJTIME) {
/* singleshot must not be used with any other mode bits */
if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
@@ -646,7 +617,6 @@ int do_adjtimex(struct timex *txc)
/* In order to modify anything, you gotta be super-user! */
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;
-
/*
* if the quartz is off by more than 10% then
* something is VERY wrong!
@@ -657,22 +627,20 @@ int do_adjtimex(struct timex *txc)
return -EINVAL;
}
- if (txc->modes & ADJ_SETOFFSET) {
- struct timespec delta;
- delta.tv_sec = txc->time.tv_sec;
- delta.tv_nsec = txc->time.tv_usec;
- if (!capable(CAP_SYS_TIME))
- return -EPERM;
- if (!(txc->modes & ADJ_NANO))
- delta.tv_nsec *= 1000;
- result = timekeeping_inject_offset(&delta);
- if (result)
- return result;
- }
+ if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
+ return -EPERM;
- getnstimeofday(&ts);
+ return 0;
+}
- raw_spin_lock_irq(&ntp_lock);
+
+/*
+ * adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
+{
+ int result;
if (txc->modes & ADJ_ADJTIME) {
long save_adjust = time_adjust;
@@ -687,7 +655,7 @@ int do_adjtimex(struct timex *txc)
/* If there are input parameters, then process them: */
if (txc->modes)
- process_adjtimex_modes(txc, &ts);
+ process_adjtimex_modes(txc, ts, time_tai);
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
NTP_SCALE_SHIFT);
@@ -709,15 +677,13 @@ int do_adjtimex(struct timex *txc)
txc->precision = 1;
txc->tolerance = MAXFREQ_SCALED / PPM_SCALE;
txc->tick = tick_usec;
- txc->tai = time_tai;
+ txc->tai = *time_tai;
/* fill PPS status fields */
pps_fill_timex(txc);
- raw_spin_unlock_irq(&ntp_lock);
-
- txc->time.tv_sec = ts.tv_sec;
- txc->time.tv_usec = ts.tv_nsec;
+ txc->time.tv_sec = ts->tv_sec;
+ txc->time.tv_usec = ts->tv_nsec;
if (!(time_status & STA_NANO))
txc->time.tv_usec /= NSEC_PER_USEC;
@@ -894,7 +860,7 @@ static void hardpps_update_phase(long error)
}
/*
- * hardpps() - discipline CPU clock oscillator to external PPS signal
+ * __hardpps() - discipline CPU clock oscillator to external PPS signal
*
* This routine is called at each PPS signal arrival in order to
* discipline the CPU clock oscillator to the PPS signal. It takes two
@@ -905,15 +871,13 @@ static void hardpps_update_phase(long error)
* This code is based on David Mills's reference nanokernel
* implementation. It was mostly rewritten but keeps the same idea.
*/
-void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
{
struct pps_normtime pts_norm, freq_norm;
unsigned long flags;
pts_norm = pps_normalize_ts(*phase_ts);
- raw_spin_lock_irqsave(&ntp_lock, flags);
-
/* clear the error bits, they will be set again if needed */
time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
@@ -925,7 +889,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
* just start the frequency interval */
if (unlikely(pps_fbase.tv_sec == 0)) {
pps_fbase = *raw_ts;
- raw_spin_unlock_irqrestore(&ntp_lock, flags);
return;
}
@@ -940,7 +903,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
time_status |= STA_PPSJITTER;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
- raw_spin_unlock_irqrestore(&ntp_lock, flags);
pr_err("hardpps: PPSJITTER: bad pulse\n");
return;
}
@@ -957,10 +919,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
hardpps_update_phase(pts_norm.nsec);
- raw_spin_unlock_irqrestore(&ntp_lock, flags);
}
-EXPORT_SYMBOL(hardpps);
-
#endif /* CONFIG_NTP_PPS */
static int __init ntp_tick_adj_setup(char *str)
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
new file mode 100644
index 000000000000..1950cb4ca2a4
--- /dev/null
+++ b/kernel/time/ntp_internal.h
@@ -0,0 +1,12 @@
+#ifndef _LINUX_NTP_INTERNAL_H
+#define _LINUX_NTP_INTERNAL_H
+
+extern void ntp_init(void);
+extern void ntp_clear(void);
+/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
+extern u64 ntp_tick_length(void);
+extern int second_overflow(unsigned long secs);
+extern int ntp_validate_timex(struct timex *);
+extern int __do_adjtimex(struct timex *, struct timespec *, s32 *);
+extern void __hardpps(const struct timespec *, const struct timespec *);
+#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7f32fe0e52cd..61d00a8cdf2f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -28,9 +28,8 @@
*/
static struct tick_device tick_broadcast_device;
-/* FIXME: Use cpumask_var_t. */
-static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
-static DECLARE_BITMAP(tmpmask, NR_CPUS);
+static cpumask_var_t tick_broadcast_mask;
+static cpumask_var_t tmpmask;
static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
static int tick_broadcast_force;
@@ -50,7 +49,7 @@ struct tick_device *tick_get_broadcast_device(void)
struct cpumask *tick_get_broadcast_mask(void)
{
- return to_cpumask(tick_broadcast_mask);
+ return tick_broadcast_mask;
}
/*
@@ -67,6 +66,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
*/
int tick_check_broadcast_device(struct clock_event_device *dev)
{
+ struct clock_event_device *cur = tick_broadcast_device.evtdev;
+
if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
(tick_broadcast_device.evtdev &&
tick_broadcast_device.evtdev->rating >= dev->rating) ||
@@ -74,9 +75,21 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
return 0;
clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
+ if (cur)
+ cur->event_handler = clockevents_handle_noop;
tick_broadcast_device.evtdev = dev;
- if (!cpumask_empty(tick_get_broadcast_mask()))
+ if (!cpumask_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(dev);
+ /*
+ * Inform all cpus about this. We might be in a situation
+ * where we did not switch to oneshot mode because the per cpu
+ * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
+ * of a oneshot capable broadcast device. Without that
+ * notification the systems stays stuck in periodic mode
+ * forever.
+ */
+ if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
+ tick_clock_notify();
return 1;
}
@@ -124,7 +137,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
if (!tick_device_is_functional(dev)) {
dev->event_handler = tick_handle_periodic;
tick_device_setup_broadcast_func(dev);
- cpumask_set_cpu(cpu, tick_get_broadcast_mask());
+ cpumask_set_cpu(cpu, tick_broadcast_mask);
tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
ret = 1;
} else {
@@ -135,7 +148,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
*/
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
int cpu = smp_processor_id();
- cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
+ cpumask_clear_cpu(cpu, tick_broadcast_mask);
tick_broadcast_clear_oneshot(cpu);
} else {
tick_device_setup_broadcast_func(dev);
@@ -199,9 +212,8 @@ static void tick_do_periodic_broadcast(void)
{
raw_spin_lock(&tick_broadcast_lock);
- cpumask_and(to_cpumask(tmpmask),
- cpu_online_mask, tick_get_broadcast_mask());
- tick_do_broadcast(to_cpumask(tmpmask));
+ cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
+ tick_do_broadcast(tmpmask);
raw_spin_unlock(&tick_broadcast_lock);
}
@@ -264,13 +276,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
if (!tick_device_is_functional(dev))
goto out;
- bc_stopped = cpumask_empty(tick_get_broadcast_mask());
+ bc_stopped = cpumask_empty(tick_broadcast_mask);
switch (*reason) {
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
- if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
- cpumask_set_cpu(cpu, tick_get_broadcast_mask());
+ if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
clockevents_shutdown(dev);
@@ -280,8 +291,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
break;
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
if (!tick_broadcast_force &&
- cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
- cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
+ cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
tick_setup_periodic(dev, 0);
@@ -289,7 +299,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
break;
}
- if (cpumask_empty(tick_get_broadcast_mask())) {
+ if (cpumask_empty(tick_broadcast_mask)) {
if (!bc_stopped)
clockevents_shutdown(bc);
} else if (bc_stopped) {
@@ -338,10 +348,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
- cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
+ cpumask_clear_cpu(cpu, tick_broadcast_mask);
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
- if (bc && cpumask_empty(tick_get_broadcast_mask()))
+ if (bc && cpumask_empty(tick_broadcast_mask))
clockevents_shutdown(bc);
}
@@ -377,13 +387,13 @@ int tick_resume_broadcast(void)
switch (tick_broadcast_device.mode) {
case TICKDEV_MODE_PERIODIC:
- if (!cpumask_empty(tick_get_broadcast_mask()))
+ if (!cpumask_empty(tick_broadcast_mask))
tick_broadcast_start_periodic(bc);
broadcast = cpumask_test_cpu(smp_processor_id(),
- tick_get_broadcast_mask());
+ tick_broadcast_mask);
break;
case TICKDEV_MODE_ONESHOT:
- if (!cpumask_empty(tick_get_broadcast_mask()))
+ if (!cpumask_empty(tick_broadcast_mask))
broadcast = tick_resume_broadcast_oneshot(bc);
break;
}
@@ -396,25 +406,58 @@ int tick_resume_broadcast(void)
#ifdef CONFIG_TICK_ONESHOT
-/* FIXME: use cpumask_var_t. */
-static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
+static cpumask_var_t tick_broadcast_oneshot_mask;
+static cpumask_var_t tick_broadcast_pending_mask;
+static cpumask_var_t tick_broadcast_force_mask;
/*
* Exposed for debugging: see timer_list.c
*/
struct cpumask *tick_get_broadcast_oneshot_mask(void)
{
- return to_cpumask(tick_broadcast_oneshot_mask);
+ return tick_broadcast_oneshot_mask;
}
-static int tick_broadcast_set_event(ktime_t expires, int force)
+/*
+ * Called before going idle with interrupts disabled. Checks whether a
+ * broadcast event from the other core is about to happen. We detected
+ * that in tick_broadcast_oneshot_control(). The callsite can use this
+ * to avoid a deep idle transition as we are about to get the
+ * broadcast IPI right away.
+ */
+int tick_check_broadcast_expired(void)
{
- struct clock_event_device *bc = tick_broadcast_device.evtdev;
+ return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
+}
+
+/*
+ * Set broadcast interrupt affinity
+ */
+static void tick_broadcast_set_affinity(struct clock_event_device *bc,
+ const struct cpumask *cpumask)
+{
+ if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
+ return;
+
+ if (cpumask_equal(bc->cpumask, cpumask))
+ return;
+
+ bc->cpumask = cpumask;
+ irq_set_affinity(bc->irq, bc->cpumask);
+}
+
+static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
+ ktime_t expires, int force)
+{
+ int ret;
if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
- return clockevents_program_event(bc, expires, force);
+ ret = clockevents_program_event(bc, expires, force);
+ if (!ret)
+ tick_broadcast_set_affinity(bc, cpumask_of(cpu));
+ return ret;
}
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
@@ -429,7 +472,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
*/
void tick_check_oneshot_broadcast(int cpu)
{
- if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
+ if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -443,27 +486,39 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
{
struct tick_device *td;
ktime_t now, next_event;
- int cpu;
+ int cpu, next_cpu = 0;
raw_spin_lock(&tick_broadcast_lock);
again:
dev->next_event.tv64 = KTIME_MAX;
next_event.tv64 = KTIME_MAX;
- cpumask_clear(to_cpumask(tmpmask));
+ cpumask_clear(tmpmask);
now = ktime_get();
/* Find all expired events */
- for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
+ for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
td = &per_cpu(tick_cpu_device, cpu);
- if (td->evtdev->next_event.tv64 <= now.tv64)
- cpumask_set_cpu(cpu, to_cpumask(tmpmask));
- else if (td->evtdev->next_event.tv64 < next_event.tv64)
+ if (td->evtdev->next_event.tv64 <= now.tv64) {
+ cpumask_set_cpu(cpu, tmpmask);
+ /*
+ * Mark the remote cpu in the pending mask, so
+ * it can avoid reprogramming the cpu local
+ * timer in tick_broadcast_oneshot_control().
+ */
+ cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
+ } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
next_event.tv64 = td->evtdev->next_event.tv64;
+ next_cpu = cpu;
+ }
}
+ /* Take care of enforced broadcast requests */
+ cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
+ cpumask_clear(tick_broadcast_force_mask);
+
/*
* Wakeup the cpus which have an expired event.
*/
- tick_do_broadcast(to_cpumask(tmpmask));
+ tick_do_broadcast(tmpmask);
/*
* Two reasons for reprogram:
@@ -480,7 +535,7 @@ again:
* Rearm the broadcast device. If event expired,
* repeat t