summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-18 09:15:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-18 09:15:24 -0700
commita03fdb7612874834d6847107198712d18b5242c7 (patch)
tree9ae81170509fd8b1c23d1b7e8edfa7a2203ffce3 /kernel
parent202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)
parent12e09337fe238981cb0c87543306e23775d1a143 (diff)
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (34 commits) time: Prevent 32 bit overflow with set_normalized_timespec() clocksource: Delay clocksource down rating to late boot clocksource: clocksource_select must be called with mutex locked clocksource: Resolve cpu hotplug dead lock with TSC unstable, fix crash timers: Drop a function prototype clocksource: Resolve cpu hotplug dead lock with TSC unstable timer.c: Fix S/390 comments timekeeping: Fix invalid getboottime() value timekeeping: Fix up read_persistent_clock() breakage on sh timekeeping: Increase granularity of read_persistent_clock(), build fix time: Introduce CLOCK_REALTIME_COARSE x86: Do not unregister PIT clocksource on PIT oneshot setup/shutdown clocksource: Avoid clocksource watchdog circular locking dependency clocksource: Protect the watchdog rating changes with clocksource_mutex clocksource: Call clocksource_change_rating() outside of watchdog_lock timekeeping: Introduce read_boot_clock timekeeping: Increase granularity of read_persistent_clock() timekeeping: Update clocksource with stop_machine timekeeping: Add timekeeper read_clock helper functions timekeeping: Move NTP adjusted clock multiplier to struct timekeeper ... Fix trivial conflict due to MIPS lemote -> loongson renaming.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c57
-rw-r--r--kernel/posix-timers.c35
-rw-r--r--kernel/time.c9
-rw-r--r--kernel/time/clocksource.c529
-rw-r--r--kernel/time/jiffies.c6
-rw-r--r--kernel/time/ntp.c7
-rw-r--r--kernel/time/timekeeping.c535
-rw-r--r--kernel/timer.c28
8 files changed, 780 insertions, 426 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 05071bf6a37b..c03f221fee44 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -48,37 +48,6 @@
#include <asm/uaccess.h>
-/**
- * ktime_get - get the monotonic time in ktime_t format
- *
- * returns the time in ktime_t format
- */
-ktime_t ktime_get(void)
-{
- struct timespec now;
-
- ktime_get_ts(&now);
-
- return timespec_to_ktime(now);
-}
-EXPORT_SYMBOL_GPL(ktime_get);
-
-/**
- * ktime_get_real - get the real (wall-) time in ktime_t format
- *
- * returns the time in ktime_t format
- */
-ktime_t ktime_get_real(void)
-{
- struct timespec now;
-
- getnstimeofday(&now);
-
- return timespec_to_ktime(now);
-}
-
-EXPORT_SYMBOL_GPL(ktime_get_real);
-
/*
* The timer bases:
*
@@ -106,31 +75,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
}
};
-/**
- * ktime_get_ts - get the monotonic clock in timespec format
- * @ts: pointer to timespec variable
- *
- * The function calculates the monotonic clock from the realtime
- * clock and the wall_to_monotonic offset and stores the result
- * in normalized timespec format in the variable pointed to by @ts.
- */
-void ktime_get_ts(struct timespec *ts)
-{
- struct timespec tomono;
- unsigned long seq;
-
- do {
- seq = read_seqbegin(&xtime_lock);
- getnstimeofday(ts);
- tomono = wall_to_monotonic;
-
- } while (read_seqretry(&xtime_lock, seq));
-
- set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
- ts->tv_nsec + tomono.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(ktime_get_ts);
-
/*
* Get the coarse grained time at the softirq based on xtime and
* wall_to_monotonic.
@@ -1155,7 +1099,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
clock_id = CLOCK_MONOTONIC;
timer->base = &cpu_base->clock_base[clock_id];
- INIT_LIST_HEAD(&timer->cb_entry);
hrtimer_init_timer_hres(timer);
#ifdef CONFIG_TIMER_STATS
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d089d052c4a9..495440779ce3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
return 0;
}
+
+static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
+{
+ *tp = current_kernel_time();
+ return 0;
+}
+
+static int posix_get_monotonic_coarse(clockid_t which_clock,
+ struct timespec *tp)
+{
+ *tp = get_monotonic_coarse();
+ return 0;
+}
+
+int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+{
+ *tp = ktime_to_timespec(KTIME_LOW_RES);
+ return 0;
+}
/*
* Initialize everything, well, just everything in Posix clocks/timers ;)
*/
@@ -262,10 +281,26 @@ static __init int init_posix_timers(void)
.timer_create = no_timer_create,
.nsleep = no_nsleep,
};
+ struct k_clock clock_realtime_coarse = {
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_realtime_coarse,
+ .clock_set = do_posix_clock_nosettime,
+ .timer_create = no_timer_create,
+ .nsleep = no_nsleep,
+ };
+ struct k_clock clock_monotonic_coarse = {
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_monotonic_coarse,
+ .clock_set = do_posix_clock_nosettime,
+ .timer_create = no_timer_create,
+ .nsleep = no_nsleep,
+ };
register_posix_clock(CLOCK_REALTIME, &clock_realtime);
register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+ register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+ register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time.c b/kernel/time.c
index 29511943871a..2e2e469a7fec 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -370,13 +370,20 @@ EXPORT_SYMBOL(mktime);
* 0 <= tv_nsec < NSEC_PER_SEC
* For negative values only the tv_sec field is negative !
*/
-void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
+void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
{
while (nsec >= NSEC_PER_SEC) {
+ /*
+ * The following asm() prevents the compiler from
+ * optimising this loop into a modulo operation. See
+ * also __iter_div_u64_rem() in include/linux/time.h
+ */
+ asm("" : "+rm"(nsec));
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
+ asm("" : "+rm"(nsec));
nsec += NSEC_PER_SEC;
--sec;
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7466cb811251..09113347d328 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -21,7 +21,6 @@
*
* TODO WishList:
* o Allow clocksource drivers to be unregistered
- * o get rid of clocksource_jiffies extern
*/
#include <linux/clocksource.h>
@@ -30,6 +29,7 @@
#include <linux/module.h>
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
#include <linux/tick.h>
+#include <linux/kthread.h>
void timecounter_init(struct timecounter *tc,
const struct cyclecounter *cc,
@@ -107,50 +107,35 @@ u64 timecounter_cyc2time(struct timecounter *tc,
}
EXPORT_SYMBOL(timecounter_cyc2time);
-/* XXX - Would like a better way for initializing curr_clocksource */
-extern struct clocksource clocksource_jiffies;
-
/*[Clocksource internal variables]---------
* curr_clocksource:
- * currently selected clocksource. Initialized to clocksource_jiffies.
- * next_clocksource:
- * pending next selected clocksource.
+ * currently selected clocksource.
* clocksource_list:
* linked list with the registered clocksources
- * clocksource_lock:
- * protects manipulations to curr_clocksource and next_clocksource
- * and the clocksource_list
+ * clocksource_mutex:
+ * protects manipulations to curr_clocksource and the clocksource_list
* override_name:
* Name of the user-specified clocksource.
*/
-static struct clocksource *curr_clocksource = &clocksource_jiffies;
-static struct clocksource *next_clocksource;
-static struct clocksource *clocksource_override;
+static struct clocksource *curr_clocksource;
static LIST_HEAD(clocksource_list);
-static DEFINE_SPINLOCK(clocksource_lock);
+static DEFINE_MUTEX(clocksource_mutex);
static char override_name[32];
static int finished_booting;
-/* clocksource_done_booting - Called near the end of core bootup
- *
- * Hack to avoid lots of clocksource churn at boot time.
- * We use fs_initcall because we want this to start before
- * device_initcall but after subsys_initcall.
- */
-static int __init clocksource_done_booting(void)
-{
- finished_booting = 1;
- return 0;
-}
-fs_initcall(clocksource_done_booting);
-
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
+static void clocksource_watchdog_work(struct work_struct *work);
+
static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog;
static struct timer_list watchdog_timer;
+static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock);
static cycle_t watchdog_last;
-static unsigned long watchdog_resumed;
+static int watchdog_running;
+
+static int clocksource_watchdog_kthread(void *data);
+static void __clocksource_change_rating(struct clocksource *cs, int rating);
/*
* Interval: 0.5sec Threshold: 0.0625s
@@ -158,135 +143,249 @@ static unsigned long watchdog_resumed;
#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
-static void clocksource_ratewd(struct clocksource *cs, int64_t delta)
+static void clocksource_watchdog_work(struct work_struct *work)
{
- if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD)
- return;
+ /*
+ * If kthread_run fails the next watchdog scan over the
+ * watchdog_list will find the unstable clock again.
+ */
+ kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
+}
+static void __clocksource_unstable(struct clocksource *cs)
+{
+ cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
+ cs->flags |= CLOCK_SOURCE_UNSTABLE;
+ if (finished_booting)
+ schedule_work(&watchdog_work);
+}
+
+static void clocksource_unstable(struct clocksource *cs, int64_t delta)
+{
printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
cs->name, delta);
- cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
- clocksource_change_rating(cs, 0);
- list_del(&cs->wd_list);
+ __clocksource_unstable(cs);
+}
+
+/**
+ * clocksource_mark_unstable - mark clocksource unstable via watchdog
+ * @cs: clocksource to be marked unstable
+ *
+ * This function is called instead of clocksource_change_rating from
+ * cpu hotplug code to avoid a deadlock between the clocksource mutex
+ * and the cpu hotplug mutex. It defers the update of the clocksource
+ * to the watchdog thread.
+ */
+void clocksource_mark_unstable(struct clocksource *cs)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&watchdog_lock, flags);
+ if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
+ if (list_empty(&cs->wd_list))
+ list_add(&cs->wd_list, &watchdog_list);
+ __clocksource_unstable(cs);
+ }
+ spin_unlock_irqrestore(&watchdog_lock, flags);
}
static void clocksource_watchdog(unsigned long data)
{
- struct clocksource *cs, *tmp;
+ struct clocksource *cs;
cycle_t csnow, wdnow;
int64_t wd_nsec, cs_nsec;
- int resumed;
+ int next_cpu;
spin_lock(&watchdog_lock);
-
- resumed = test_and_clear_bit(0, &watchdog_resumed);
+ if (!watchdog_running)
+ goto out;
wdnow = watchdog->read(watchdog);
- wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
+ wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
+ watchdog->mult, watchdog->shift);
watchdog_last = wdnow;
- list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
- csnow = cs->read(cs);
+ list_for_each_entry(cs, &watchdog_list, wd_list) {
- if (unlikely(resumed)) {
- cs->wd_last = csnow;
+ /* Clocksource already marked unstable? */
+ if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
+ if (finished_booting)
+ schedule_work(&watchdog_work);
continue;
}
- /* Initialized ? */
+ csnow = cs->read(cs);
+
+ /* Clocksource initialized ? */
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
- if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
- (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
- cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
- /*
- * We just marked the clocksource as
- * highres-capable, notify the rest of the
- * system as well so that we transition
- * into high-res mode:
- */
- tick_clock_notify();
- }
cs->flags |= CLOCK_SOURCE_WATCHDOG;
cs->wd_last = csnow;
- } else {
- cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask);
- cs->wd_last = csnow;
- /* Check the delta. Might remove from the list ! */
- clocksource_ratewd(cs, cs_nsec - wd_nsec);
+ continue;
}
- }
- if (!list_empty(&watchdog_list)) {
- /*
- * Cycle through CPUs to check if the CPUs stay
- * synchronized to each other.
- */
- int next_cpu = cpumask_next(raw_smp_processor_id(),
- cpu_online_mask);
+ /* Check the deviation from the watchdog clocksource. */
+ cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
+ cs->mask, cs->mult, cs->shift);
+ cs->wd_last = csnow;
+ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+ clocksource_unstable(cs, cs_nsec - wd_nsec);
+ continue;
+ }
- if (next_cpu >= nr_cpu_ids)
- next_cpu = cpumask_first(cpu_online_mask);
- watchdog_timer.expires += WATCHDOG_INTERVAL;
- add_timer_on(&watchdog_timer, next_cpu);
+ if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
+ (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
+ (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
+ cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
+ /*
+ * We just marked the clocksource as highres-capable,
+ * notify the rest of the system as well so that we
+ * transition into high-res mode:
+ */
+ tick_clock_notify();
+ }
}
+
+ /*
+ * Cycle through CPUs to check if the CPUs stay synchronized
+ * to each other.
+ */
+ next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+ if (next_cpu >= nr_cpu_ids)
+ next_cpu = cpumask_first(cpu_online_mask);
+ watchdog_timer.expires += WATCHDOG_INTERVAL;
+ add_timer_on(&watchdog_timer, next_cpu);
+out:
spin_unlock(&watchdog_lock);
}
+
+static inline void clocksource_start_watchdog(void)
+{
+ if (watchdog_running || !watchdog || list_empty(&watchdog_list))
+ return;
+ init_timer(&watchdog_timer);
+ watchdog_timer.function = clocksource_watchdog;
+ watchdog_last = watchdog->read(watchdog);
+ watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
+ add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
+ watchdog_running = 1;
+}
+
+static inline void clocksource_stop_watchdog(void)
+{
+ if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
+ return;
+ del_timer(&watchdog_timer);
+ watchdog_running = 0;
+}
+
+static inline void clocksource_reset_watchdog(void)
+{
+ struct clocksource *cs;
+
+ list_for_each_entry(cs, &watchdog_list, wd_list)
+ cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
+}
+
static void clocksource_resume_watchdog(void)
{
- set_bit(0, &watchdog_resumed);
+ unsigned long flags;
+
+ spin_lock_irqsave(&watchdog_lock, flags);
+ clocksource_reset_watchdog();
+ spin_unlock_irqrestore(&watchdog_lock, flags);
}
-static void clocksource_check_watchdog(struct clocksource *cs)
+static void clocksource_enqueue_watchdog(struct clocksource *cs)
{
- struct clocksource *cse;
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
- int started = !list_empty(&watchdog_list);
-
+ /* cs is a clocksource to be watched. */
list_add(&cs->wd_list, &watchdog_list);
- if (!started && watchdog) {
- watchdog_last = watchdog->read(watchdog);
- watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
- add_timer_on(&watchdog_timer,
- cpumask_first(cpu_online_mask));
- }
+ cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
} else {
+ /* cs is a watchdog. */
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
-
+ /* Pick the best watchdog. */
if (!watchdog || cs->rating > watchdog->rating) {
- if (watchdog)
- del_timer(&watchdog_timer);
watchdog = cs;
- init_timer(&watchdog_timer);
- watchdog_timer.function = clocksource_watchdog;
-
/* Reset watchdog cycles */
- list_for_each_entry(cse, &watchdog_list, wd_list)
- cse->flags &= ~CLOCK_SOURCE_WATCHDOG;
- /* Start if list is not empty */
- if (!list_empty(&watchdog_list)) {
- watchdog_last = watchdog->read(watchdog);
- watchdog_timer.expires =
- jiffies + WATCHDOG_INTERVAL;
- add_timer_on(&watchdog_timer,
- cpumask_first(cpu_online_mask));
- }
+ clocksource_reset_watchdog();
+ }
+ }
+ /* Check if the watchdog timer needs to be started. */
+ clocksource_start_watchdog();
+ spin_unlock_irqrestore(&watchdog_lock, flags);
+}
+
+static void clocksource_dequeue_watchdog(struct clocksource *cs)
+{
+ struct clocksource *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&watchdog_lock, flags);
+ if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
+ /* cs is a watched clocksource. */
+ list_del_init(&cs->wd_list);
+ } else if (cs == watchdog) {
+ /* Reset watchdog cycles */
+ clocksource_reset_watchdog();
+ /* Current watchdog is removed. Find an alternative. */
+ watchdog = NULL;
+ list_for_each_entry(tmp, &clocksource_list, list) {
+ if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
+ continue;
+ if (!watchdog || tmp->rating > watchdog->rating)
+ watchdog = tmp;
}
}
+ cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
+ /* Check if the watchdog timer needs to be stopped. */
+ clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
}
-#else
-static void clocksource_check_watchdog(struct clocksource *cs)
+
+static int clocksource_watchdog_kthread(void *data)
+{
+ struct clocksource *cs, *tmp;
+ unsigned long flags;
+ LIST_HEAD(unstable);
+
+ mutex_lock(&clocksource_mutex);
+ spin_lock_irqsave(&watchdog_lock, flags);
+ list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
+ if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
+ list_del_init(&cs->wd_list);
+ list_add(&cs->wd_list, &unstable);
+ }
+ /* Check if the watchdog timer needs to be stopped. */
+ clocksource_stop_watchdog();
+ spin_unlock_irqrestore(&watchdog_lock, flags);
+
+ /* Needs to be done outside of watchdog lock */
+ list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
+ list_del_init(&cs->wd_list);
+ __clocksource_change_rating(cs, 0);
+ }
+ mutex_unlock(&clocksource_mutex);
+ return 0;
+}
+
+#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
+
+static void clocksource_enqueue_watchdog(struct clocksource *cs)
{
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
}
+static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
-#endif
+static inline int clocksource_watchdog_kthread(void *data) { return 0; }
+
+#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
/**
* clocksource_resume - resume the clocksource(s)
@@ -294,18 +393,16 @@ static inline void clocksource_resume_watchdog(void) { }
void clocksource_resume(void)
{
struct clocksource *cs;
- unsigned long flags;
- spin_lock_irqsave(&clocksource_lock, flags);
+ mutex_lock(&clocksource_mutex);
- list_for_each_entry(cs, &clocksource_list, list) {
+ list_for_each_entry(cs, &clocksource_list, list)
if (cs->resume)
cs->resume();
- }
clocksource_resume_watchdog();
- spin_unlock_irqrestore(&clocksource_lock, flags);
+ mutex_unlock(&clocksource_mutex);
}
/**
@@ -320,75 +417,94 @@ void clocksource_touch_watchdog(void)
clocksource_resume_watchdog();
}
+#ifdef CONFIG_GENERIC_TIME
+
/**
- * clocksource_get_next - Returns the selected clocksource
+ * clocksource_select - Select the best clocksource available
+ *
+ * Private function. Must hold clocksource_mutex when called.
*
+ * Select the clocksource with the best rating, or the clocksource,
+ * which is selected by userspace override.
*/
-struct clocksource *clocksource_get_next(void)
+static void clocksource_select(void)
{
- unsigned long flags;
+ struct clocksource *best, *cs;
- spin_lock_irqsave(&clocksource_lock, flags);
- if (next_clocksource && finished_booting) {
- curr_clocksource = next_clocksource;
- next_clocksource = NULL;
+ if (!finished_booting || list_empty(&clocksource_list))
+ return;
+ /* First clocksource on the list has the best rating. */
+ best = list_first_entry(&clocksource_list, struct clocksource, list);
+ /* Check for the override clocksource. */
+ list_for_each_entry(cs, &clocksource_list, list) {
+ if (strcmp(cs->name, override_name) != 0)
+ continue;
+ /*
+ * Check to make sure we don't switch to a non-highres
+ * capable clocksource if the tick code is in oneshot
+ * mode (highres or nohz)
+ */
+ if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
+ tick_oneshot_mode_active()) {
+ /* Override clocksource cannot be used. */
+ printk(KERN_WARNING "Override clocksource %s is not "
+ "HRT compatible. Cannot switch while in "
+ "HRT/NOHZ mode\n", cs->name);
+ override_name[0] = 0;
+ } else
+ /* Override clocksource can be used. */
+ best = cs;
+ break;
+ }
+ if (curr_clocksource != best) {
+ printk(KERN_INFO "Switching to clocksource %s\n", best->name);
+ curr_clocksource = best;
+ timekeeping_notify(curr_clocksource);
}
- spin_unlock_irqrestore(&clocksource_lock, flags);
-
- return curr_clocksource;
}
-/**
- * select_clocksource - Selects the best registered clocksource.
- *
- * Private function. Must hold clocksource_lock when called.
+#else /* CONFIG_GENERIC_TIME */
+
+static inline void clocksource_select(void) { }
+
+#endif
+
+/*
+ * clocksource_done_booting - Called near the end of core bootup
*
- * Select the clocksource with the best rating, or the clocksource,
- * which is selected by userspace override.
+ * Hack to avoid lots of clocksource churn at boot time.
+ * We use fs_initcall because we want this to start before
+ * device_initcall but after subsys_initcall.
*/
-static struct clocksource *select_clocksource(void)
+static int __init clocksource_done_booting(void)
{
- struct clocksource *next;
-
- if (list_empty(&clocksource_list))
- return NULL;
-
- if (clocksource_override)
- next = clocksource_override;
- else
- next = list_entry(clocksource_list.next, struct clocksource,
- list);
+ finished_booting = 1;
- if (next == curr_clocksource)
- return NULL;
+ /*
+ * Run the watchdog first to eliminate unstable clock sources
+ */
+ clocksource_watchdog_kthread(NULL);
- return next;
+ mutex_lock(&clocksource_mutex);
+ clocksource_select();
+ mutex_unlock(&clocksource_mutex);
+ return 0;
}
+fs_initcall(clocksource_done_booting);
/*
* Enqueue the clocksource sorted by rating
*/
-static int clocksource_enqueue(struct clocksource *c)
+static void clocksource_enqueue(struct clocksource *cs)
{
- struct list_head *tmp, *entry = &clocksource_list;
+ struct list_head *entry = &clocksource_list;
+ struct clocksource *tmp;
- list_for_each(tmp, &clocksource_list) {
- struct clocksource *cs;
-
- cs = list_entry(tmp, struct clocksource, list);
- if (cs == c)
- return -EBUSY;
+ list_for_each_entry(tmp, &clocksource_list, list)
/* Keep track of the place, where to insert */
- if (cs->rating >= c->rating)
- entry = tmp;
- }
- list_add(&c->list, entry);
-
- if (strlen(c->name) == strlen(override_name) &&
- !strcmp(c->name, override_name))
- clocksource_override = c;
-
- return 0;
+ if (tmp->rating >= cs->rating)
+ entry = &tmp->list;
+ list_add(&cs->list, entry);
}
/**
@@ -397,52 +513,48 @@ static int clocksource_enqueue(struct clocksource *c)
*
* Returns -EBUSY if registration fails, zero otherwise.
*/
-int clocksource_register(struct clocksource *c)
+int clocksource_register(struct clocksource *cs)
{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&clocksource_lock, flags);
- ret = clocksource_enqueue(c);
- if (!ret)
- next_clocksource = select_clocksource();
- spin_unlock_irqrestore(&clocksource_lock, flags);
- if (!ret)
- clocksource_check_watchdog(c);
- return ret;
+ mutex_lock(&clocksource_mutex);
+ clocksource_enqueue(cs);
+ clocksource_select();
+ clocksource_enqueue_watchdog(cs);
+ mutex_unlock(&clocksource_mutex);
+ return 0;
}
EXPORT_SYMBOL(clocksource_register);
+static void __clocksource_change_rating(struct clocksource *cs, int rating)
+{
+ list_del(&cs->list);
+ cs->rating = rating;
+ clocksource_enqueue(cs);
+ clocksource_select();
+}
+
/**
* clocksource_change_rating - Change the rating of a registered clocksource
- *
*/
void clocksource_change_rating(struct clocksource *cs, int rating)
{
- unsigned long flags;
-
- spin_lock_irqsave(&clocksource_lock, flags);
- list_del(&cs->list);
- cs->rating = rating;
- clocksource_enqueue(cs);
- next_clocksource = select_clocksource();
- spin_unlock_irqrestore(&clocksource_lock, flags);
+ mutex_lock(&clocksource_mutex);
+ __clocksource_change_rating(cs, rating);
+ mutex_unlock(&clocksource_mutex);
}
+EXPORT_SYMBOL(clocksource_change_rating);
/**
* clocksource_unregister - remove a registered clocksource
*/
void clocksource_unregister(struct clocksource *cs)
{
- unsigned long flags;
-
- spin_lock_irqsave(&clocksource_lock, flags);
+ mutex_lock(&clocksource_mutex);
+ clocksource_dequeue_watchdog(cs);
list_del(&cs->list);
- if (clocksource_override == cs)
- clocksource_override = NULL;
- next_clocksource = select_clocksource();
- spin_unlock_irqrestore(&clocksource_lock, flags);
+ clocksource_select();
+ mutex_unlock(&clocksource_mutex);
}
+EXPORT_SYMBOL(clocksource_unregister);
#ifdef CONFIG_SYSFS
/**
@@ -458,9 +570,9 @@ sysfs_show_current_clocksources(struct sys_device *dev,
{
ssize_t count = 0;
- spin_lock_irq(&clocksource_lock);
+ mutex_lock(&clocksource_mutex);
count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
- spin_unlock_irq(&clocksource_lock);
+ mutex_unlock(&clocksource_mutex);
return count;
}
@@ -478,9 +590,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf, size_t count)
{
- struct clocksource *ovr = NULL;
size_t ret = count;
- int len;
/* strings from sysfs write are not 0 terminated! */
if (count >= sizeof(override_name))
@@ -490,44 +600,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
if (buf[count-1] == '\n')
count--;
- spin_lock_irq(&clocksource_lock);
+ mutex_lock(&clocksource_mutex);
if (count > 0)
memcpy(override_name, buf, count);
override_name[count] = 0;
+ clocksource_select();
- len = strlen(override_name);
- if (len) {
- struct clocksource *cs;
-
- ovr = clocksource_override;
- /* try to select it: */
- list_for_each_entry(cs, &clocksource_list, list) {
- if (strlen(cs->name) == len &&
- !strcmp(cs->name, override_name))
- ovr = cs;
- }
- }
-
- /*
- * Check to make sure we don't switch to a non-highres capable
- * clocksource if the tick code is in oneshot mode (highres or nohz)
- */
- if (tick_oneshot_mode_active() && ovr &&
- !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
- printk(KERN_WARNING "%s clocksource is not HRT compatible. "
- "Cannot switch while in HRT/NOHZ mode\n", ovr->name);
- ovr = NULL;
- override_name[0] = 0;
- }
-
- /* Reselect, when the override name has changed */
- if (ovr != clocksource_override) {
- clocksource_override = ovr;
- next_clocksource = select_clocksource();
- }
-
- spin_unlock_irq(&clocksource_lock);
+ mutex_unlock(&clocksource_mutex);
return ret;
}
@@ -547,7 +627,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
struct clocksource *src;
ssize_t count = 0;
- spin_lock_irq(&clocksource_lock);
+ mutex_lock(&clocksource_mutex);
list_for_each_entry(src, &clocksource_list, list) {
/*
* Don't show non-HRES clocksource if the tick code is
@@ -559,7 +639,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
"%s ", src->name);
}
- spin_unlock_irq(&clocksource_lock);
+ mutex_unlock(&clocksource_mutex);
count += snprintf(buf + count,
max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
@@ -614,11 +694,10 @@ device_initcall(init_clocksource_sysfs);
*/
static int __init boot_override_clocksource(char* str)
{
- unsigned long flags;
- spin_lock_irqsave(&clocksource_lock, flags);
+ mutex_lock(&clocksource_mutex);
if (str)
strlcpy(override_name, str, sizeof(override_name));
- spin_unlock_irqrestore(&clocksource_lock, flags);
+ mutex_unlock(&clocksource_mutex);
return 1;
}
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index c3f6c30816e3..5404a8456909 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = {
.read = jiffies_read,
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
- .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT,
.shift = JIFFIES_SHIFT,
};
@@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void)
}
core_initcall(init_jiffies_clocksource);
+
+struct clocksource * __init __weak clocksource_default_clock(void)
+{
+ return &clocksource_jiffies;
+}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7fc64375ff43..4800f933910e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
case TIME_OK:
break;
case TIME_INS:
- xtime.tv_sec--;
- wall_to_monotonic.tv_sec++;
+ timekeeping_leap_insert(-1);
time_state = TIME_OOP;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
@@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
res = HRTIMER_RESTART;
break;
case TIME_DEL:
- xtime.tv_sec++;
+ timekeeping_leap_insert(1);
time_tai--;
- wall_to_monotonic.tv_sec--;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
@@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
time_state = TIME_OK;
break;
}
- update_vsyscall(&xtime, clock);
write_sequnlock(&xtime_lock);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e8c77d9c633a..fb0f46fa1ecd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -18,7 +18,117 @@
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/tick.h>
+#include <linux/stop_machine.h>
+
+/* Structure holding internal timekeeping values. */
+struct timekeeper {
+ /* Current clocksource used for timekeeping. */
+ struct clocksource *clock;
+ /* The shift value of the current clocksource. */
+ int shift;
+
+ /* Number of clock cycles in one NTP interval. */
+ cycle_t cycle_interval;
+ /* Number of clock shifted nano seconds in one NTP interval. */
+ u64 xtime_interval;
+ /* Raw nano seconds accumulated per NTP interval. */
+ u32 raw_interval;
+
+ /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
+ u64 xtime_nsec;
+ /* Difference between accumulated time and NTP time in ntp
+ * shifted nano seconds. */
+ s64 ntp_error;
+ /* Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds. */
+ int ntp_error_shift;
+ /* NTP adjusted clock multiplier */
+ u32 mult;
+};
+
+struct timekeeper timekeeper;
+
+/**
+ * timekeeper_setup_internals - Set up internals to use clocksource clock.
+ *
+ * @clock: Pointer to clocksource.
+ *
+ * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
+ * pair and interval request.
+ *
+ * Unless you're the timekeeping code, you should not be using this!
+ */
+static void timekeeper_setup_internals(struct clocksource *clock)
+{
+ cycle_t interval;
+ u64 tmp;
+
+ timekeeper.clock = clock;
+ clock->cycle_last = clock->read(clock);
+ /* Do the ns -> cycle conversion first, using original mult */
+ tmp = NTP_INTERVAL_LENGTH;
+ tmp <<= c