From 5439050f9f1aa92381ab9beccbea6ddf0c687c2b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Dec 2008 13:37:57 +0100 Subject: [S390] cpu topology: fix cpu_core_map initialization Common code doesn't call arch_update_cpu_topology() anymore on cpu hotplug. But our architecture backend relied on that in order to update the cpu_core_map. For machines without cpu topology support this leads uninitialized cpu_core_maps for later on added cpus. To solve this just initialize the maps with cpu_possible_map, since that will be always valid for machines without topology support. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index a947899dcba1..586c5e33b3f4 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -78,7 +78,7 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) cpus_clear(mask); if (!machine_has_topology) - return cpu_present_map; + return cpu_possible_map; spin_lock_irqsave(&topology_lock, flags); while (core) { if (cpu_isset(cpu, core->mask)) { @@ -168,7 +168,7 @@ static void topology_update_polarization_simple(void) int cpu; mutex_lock(&smp_cpu_state_mutex); - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) smp_cpu_polarization[cpu] = POLARIZATION_HRZ; mutex_unlock(&smp_cpu_state_mutex); } @@ -199,7 +199,7 @@ int topology_set_cpu_management(int fc) rc = ptf(PTF_HORIZONTAL); if (rc) return -EBUSY; - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; return rc; } @@ -208,7 +208,7 @@ static void update_cpu_core_map(void) { int cpu; - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) cpu_core_map[cpu] = cpu_coregroup_map(cpu); } -- cgit v1.2.3 From 8f2961c39eacd5d450048d8a3e91675486f3a015 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 25 Dec 2008 13:37:58 +0100 Subject: [S390] audit: get s390 ret_from_fork in sync with other architectures On s390 we have ret_from_fork jump not to the "do all work we normally do on return from syscall" as on x86, ppc, etc., but to the "do all such work except audit". Historical reasons - the codepath triggered when we have AUDIT process flag set is separated from the normall one and they converge at sysc_return, which is the common part of post-syscall work. And does not include calling audit_syscall_exit() - that's done in the end of sysc_tracesys path, just before that path jumps to sysc_return. IOW, the child returning from fork()/clone()/vfork() doesn't call audit_syscall_exit() at all, so no matter what we do with its audit context, we are not going to see the audit entry. The fix is simple: have ret_from_fork go to the point just past the call of sys_.... in the 'we have AUDIT flag set' path. There we have (64bit variant; for 31bit the situation is the same): sysc_tracenogo: tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) jz sysc_return la %r2,SP_PTREGS(%r15) # load pt_regs larl %r14,sysc_return # return point is sysc_return jg do_syscall_trace_exit which is precisely what we need - check the flag, bugger off to sysc_return if not set, otherwise call do_syscall_trace_exit() and bugger off to sysc_return. r9 has just been properly set by ret_from_fork itself, so we are fine. Tested on s390x, seems to work fine. WARNING: it's been about 16 years since my last contact with 3X0 assembler[1], so additional review would be very welcome. I don't think I've managed to screw it up, but... [1] that *was* in another country and besides, the box is dead... Signed-off-by: Al Viro Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/entry64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 198ea18a534d..c1cfc7e39ec9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -436,7 +436,7 @@ ret_from_fork: basr %r14,%r1 TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_return) + b BASED(sysc_tracenogo) # # kernel_execve function needs to deal with pt_regs that is not diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 89c121ae6339..ea90d9a676f8 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -417,7 +417,7 @@ ret_from_fork: 0: brasl %r14,schedule_tail TRACE_IRQS_ON stosm 24(%r15),0x03 # reenable interrupts - j sysc_return + j sysc_tracenogo # # kernel_execve function needs to deal with pt_regs that is not -- cgit v1.2.3 From f414f5f15376764d68a31dc568d9e814d3fcb58a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Dec 2008 13:37:59 +0100 Subject: [S390] cpu topology: dont destroy cpu sets on topology change Call rebuild_sched_domains instead of arch_reinit_sched_domains if cpu topology changes. This leaves cpu sets alone which otherwise would be destroyed. If and how it makes sense to define cpu sets on a virtualized architecture is another question. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 586c5e33b3f4..36faac50e774 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -234,7 +235,7 @@ void arch_update_cpu_topology(void) static void topology_work_fn(struct work_struct *work) { - arch_reinit_sched_domains(); + rebuild_sched_domains(); } void topology_schedule_update(void) -- cgit v1.2.3 From fc5243d98ac2575ad14a974b3c097e9ba874c03d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 25 Dec 2008 13:38:35 +0100 Subject: [S390] arch_setup_additional_pages arguments arch_setup_additional_pages currently gets two arguments, the binary format descripton and an indication if the process uses an executable stack or not. The second argument is not used by anybody, it could be removed without replacement. What actually does make sense is to pass an indication if the process uses the elf interpreter or not. The glibc code will not use anything from the vdso if the process does not use the dynamic linker, so for statically linked binaries the architecture backend can choose not to map the vdso. Acked-by: Ingo Molnar Signed-off-by: Martin Schwidefsky --- arch/powerpc/include/asm/elf.h | 2 +- arch/powerpc/kernel/vdso.c | 3 +-- arch/sh/include/asm/elf.h | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 3 +-- arch/x86/include/asm/elf.h | 2 +- arch/x86/vdso/vdso32-setup.c | 2 +- arch/x86/vdso/vma.c | 2 +- 7 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index d812929390e4..cd46f023ec6d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -267,7 +267,7 @@ extern int ucache_bsize; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, - int executable_stack); + int uses_interp); #define VDSO_AUX_ENT(a,b) NEW_AUX_ENT(a,b); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 65639a43e644..f7ec7d0888fe 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -184,8 +184,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ -int arch_setup_additional_pages(struct linux_binprm *bprm, - int executable_stack) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; struct page **vdso_pagelist; diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h index 9eb9036a1bdc..9381397ebeb8 100644 --- a/arch/sh/include/asm/elf.h +++ b/arch/sh/include/asm/elf.h @@ -204,7 +204,7 @@ do { \ #define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, - int executable_stack); + int uses_interp); extern unsigned int vdso_enabled; extern void __kernel_vsyscall; diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 95f4de0800ec..3f7e415be86a 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -59,8 +59,7 @@ int __init vsyscall_init(void) } /* Setup a VMA at program startup for the vsyscall page */ -int arch_setup_additional_pages(struct linux_binprm *bprm, - int executable_stack) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 40ca1bea7916..f51a3ddde01a 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -325,7 +325,7 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 extern int arch_setup_additional_pages(struct linux_binprm *bprm, - int executable_stack); + int uses_interp); extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 513f330c5832..1241f118ab56 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -310,7 +310,7 @@ int __init sysenter_setup(void) } /* Setup a VMA at program startup for the vsyscall page */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr; diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 257ba4a10abf..9c98cc6ba978 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -98,7 +98,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) /* Setup a VMA at program startup for the vsyscall page. Not called for compat tasks */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr; -- cgit v1.2.3 From b020632e40c3ed5e8c0c066d022672907e8401cf Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 25 Dec 2008 13:38:36 +0100 Subject: [S390] introduce vdso on s390 Add a vdso to speed up gettimeofday and clock_getres/clock_gettime for CLOCK_REALTIME/CLOCK_MONOTONIC. Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 + arch/s390/include/asm/auxvec.h | 2 + arch/s390/include/asm/elf.h | 16 +++ arch/s390/include/asm/mmu.h | 1 + arch/s390/include/asm/page.h | 2 + arch/s390/include/asm/vdso.h | 39 ++++++ arch/s390/kernel/Makefile | 7 +- arch/s390/kernel/asm-offsets.c | 15 ++ arch/s390/kernel/time.c | 31 ++++ arch/s390/kernel/vdso.c | 234 +++++++++++++++++++++++++++++++ arch/s390/kernel/vdso32/Makefile | 55 ++++++++ arch/s390/kernel/vdso32/clock_getres.S | 39 ++++++ arch/s390/kernel/vdso32/clock_gettime.S | 128 +++++++++++++++++ arch/s390/kernel/vdso32/gettimeofday.S | 82 +++++++++++ arch/s390/kernel/vdso32/note.S | 12 ++ arch/s390/kernel/vdso32/vdso32.lds.S | 138 ++++++++++++++++++ arch/s390/kernel/vdso32/vdso32_wrapper.S | 13 ++ arch/s390/kernel/vdso64/Makefile | 55 ++++++++ arch/s390/kernel/vdso64/clock_getres.S | 39 ++++++ arch/s390/kernel/vdso64/clock_gettime.S | 89 ++++++++++++ arch/s390/kernel/vdso64/gettimeofday.S | 56 ++++++++ arch/s390/kernel/vdso64/note.S | 12 ++ arch/s390/kernel/vdso64/vdso64.lds.S | 138 ++++++++++++++++++ arch/s390/kernel/vdso64/vdso64_wrapper.S | 13 ++ 24 files changed, 1218 insertions(+), 1 deletion(-) create mode 100644 arch/s390/include/asm/vdso.h create mode 100644 arch/s390/kernel/vdso.c create mode 100644 arch/s390/kernel/vdso32/Makefile create mode 100644 arch/s390/kernel/vdso32/clock_getres.S create mode 100644 arch/s390/kernel/vdso32/clock_gettime.S create mode 100644 arch/s390/kernel/vdso32/gettimeofday.S create mode 100644 arch/s390/kernel/vdso32/note.S create mode 100644 arch/s390/kernel/vdso32/vdso32.lds.S create mode 100644 arch/s390/kernel/vdso32/vdso32_wrapper.S create mode 100644 arch/s390/kernel/vdso64/Makefile create mode 100644 arch/s390/kernel/vdso64/clock_getres.S create mode 100644 arch/s390/kernel/vdso64/clock_gettime.S create mode 100644 arch/s390/kernel/vdso64/gettimeofday.S create mode 100644 arch/s390/kernel/vdso64/note.S create mode 100644 arch/s390/kernel/vdso64/vdso64.lds.S create mode 100644 arch/s390/kernel/vdso64/vdso64_wrapper.S (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8116a3328a19..37bb37334dec 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -43,6 +43,9 @@ config GENERIC_HWEIGHT config GENERIC_TIME def_bool y +config GENERIC_TIME_VSYSCALL + def_bool y + config GENERIC_CLOCKEVENTS def_bool y diff --git a/arch/s390/include/asm/auxvec.h b/arch/s390/include/asm/auxvec.h index 0d340720fd99..a1f153e89133 100644 --- a/arch/s390/include/asm/auxvec.h +++ b/arch/s390/include/asm/auxvec.h @@ -1,4 +1,6 @@ #ifndef __ASMS390_AUXVEC_H #define __ASMS390_AUXVEC_H +#define AT_SYSINFO_EHDR 33 + #endif diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 261785ab5b22..d480f39d65e6 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -120,6 +120,10 @@ typedef s390_compat_regs compat_elf_gregset_t; #include /* for save_access_regs */ #include +#include + +extern unsigned int vdso_enabled; + /* * This is used to ensure we don't load something for the wrong architecture. */ @@ -191,4 +195,16 @@ do { \ current->mm->context.noexec == 0; \ }) +#define ARCH_DLINFO \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso_base); \ +} while (0) + +struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +int arch_setup_additional_pages(struct linux_binprm *, int); + #endif diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index d2b4ff831477..3b59216e6284 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -6,6 +6,7 @@ typedef struct { struct list_head pgtable_list; unsigned long asce_bits; unsigned long asce_limit; + unsigned long vdso_base; int noexec; int has_pgste; /* The mmu context has extended page tables */ int alloc_pgste; /* cloned contexts will have extended page tables */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 991ba939408c..32e8f6aa4384 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -152,4 +152,6 @@ void arch_alloc_page(struct page *page, int order); #include #include +#define __HAVE_ARCH_GATE_AREA 1 + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h new file mode 100644 index 000000000000..a44f4fe16a35 --- /dev/null +++ b/arch/s390/include/asm/vdso.h @@ -0,0 +1,39 @@ +#ifndef __S390_VDSO_H__ +#define __S390_VDSO_H__ + +#ifdef __KERNEL__ + +/* Default link addresses for the vDSOs */ +#define VDSO32_LBASE 0 +#define VDSO64_LBASE 0 + +#define VDSO_VERSION_STRING LINUX_2.6.26 + +#ifndef __ASSEMBLY__ + +/* + * Note about this structure: + * + * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this + * structure is supposed to be known only to the function in the vdso + * itself and may change without notice. + */ + +struct vdso_data { + __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */ + __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ + __u64 xtime_clock_sec; /* Kernel time 0x10 */ + __u64 xtime_clock_nsec; /* 0x18 */ + __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */ + __u64 wtom_clock_nsec; /* 0x28 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ + __u32 tz_dsttime; /* Type of dst correction 0x34 */ +}; + +extern struct vdso_data *vdso_data; + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 50f657e77344..235b9484a4dc 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -14,7 +14,8 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := bitmap.o traps.o time.o process.o base.o early.o \ setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o + s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ + vdso.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) @@ -39,3 +40,7 @@ S390_KEXEC_OBJS := machine_kexec.o crash.o S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) +# vdso +obj-$(CONFIG_64BIT) += vdso64/ +obj-$(CONFIG_32BIT) += vdso32/ +obj-$(CONFIG_COMPAT) += vdso32/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 3d144e6020c6..e641f60bac99 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -6,6 +6,7 @@ #include #include +#include int main(void) { @@ -38,5 +39,19 @@ int main(void) DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); + BLANK(); + /* timeval/timezone offsets for use by vdso */ + DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); + DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); + DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + /* constants used by the vdso */ + DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + return 0; } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index eccefbbff887..b73bbf31f432 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,36 @@ static struct clocksource clocksource_tod = { }; +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +{ + if (clock != &clocksource_tod) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = clock->cycle_last; + vdso_data->xtime_clock_sec = xtime.tv_sec; + vdso_data->xtime_clock_nsec = xtime.tv_nsec; + vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; + smp_wmb(); + ++vdso_data->tb_update_count; +} + +extern struct timezone sys_tz; + +void update_vsyscall_tz(void) +{ + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tb_update_count; +} + /* * Initialize the TOD clock and the CPU timer of * the boot cpu. diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c new file mode 100644 index 000000000000..10a6ccef4412 --- /dev/null +++ b/arch/s390/kernel/vdso.c @@ -0,0 +1,234 @@ +/* + * vdso setup for s390 + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* Max supported size for symbol names */ +#define MAX_SYMNAME 64 + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif + +#ifdef CONFIG_64BIT +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +static unsigned int vdso64_pages; +static struct page **vdso64_pagelist; +#endif /* CONFIG_64BIT */ + +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso_enabled = 1; + +static int __init vdso_setup(char *s) +{ + vdso_enabled = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("vdso=", vdso_setup); + +/* + * The vdso data page + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __attribute__((__section__(".data.page_aligned"))); +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page **vdso_pagelist; + unsigned long vdso_pages; + unsigned long vdso_base; + int rc; + + if (!vdso_enabled) + return 0; + /* + * Only map the vdso for dynamically linked elf binaries. + */ + if (!uses_interp) + return 0; + + vdso_base = mm->mmap_base; +#ifdef CONFIG_64BIT + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (test_thread_flag(TIF_31BIT)) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + } +#endif +#else + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for + * the process + */ + if (vdso_pages == 0) + return 0; + + current->mm->context.vdso_base = 0; + + /* + * pick a base address for the vDSO in process space. We try to put + * it at vdso_base which is the "natural" base for it, but we might + * fail and end up putting it elsewhere. + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, vdso_base, + vdso_pages << PAGE_SHIFT, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto out_up; + } + + /* + * our vma flags don't have VM_WRITE so by default, the process + * isn't allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW + * on those pages but it's then your responsibility to never do that + * on the "data" page of the vDSO or you'll stop getting kernel + * updates and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though + * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + vdso_pagelist); + if (rc) + goto out_up; + + /* Put vDSO base into mm struct */ + current->mm->context.vdso_base = vdso_base; + + up_write(&mm->mmap_sem); + return 0; + +out_up: + up_write(&mm->mmap_sem); + return rc; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) + return "[vdso]"; + return NULL; +} + +static int __init vdso_init(void) +{ + int i; + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) + /* Calculate the size of the 32 bit vDSO */ + vdso32_pages = ((&vdso32_end - &vdso32_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1), + GFP_KERNEL); + BUG_ON(vdso32_pagelist == NULL); + for (i = 0; i < vdso32_pages - 1; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso32_pagelist[i] = pg; + } + vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); + vdso32_pagelist[vdso32_pages] = NULL; +#endif + +#ifdef CONFIG_64BIT + /* Calculate the size of the 64 bit vDSO */ + vdso64_pages = ((&vdso64_end - &vdso64_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1), + GFP_KERNEL); + BUG_ON(vdso64_pagelist == NULL); + for (i = 0; i < vdso64_pages - 1; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso64_pagelist[i] = pg; + } + vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); + vdso64_pagelist[vdso64_pages] = NULL; +#endif /* CONFIG_64BIT */ + + get_page(virt_to_page(vdso_data)); + + smp_wmb(); + + return 0; +} +arch_initcall(vdso_init); + +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct task_struct *task, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +{ + return NULL; +} diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile new file mode 100644 index 000000000000..ca78ad60ba24 --- /dev/null +++ b/arch/s390/kernel/vdso32/Makefile @@ -0,0 +1,55 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_31 += -m31 -s + +KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso32.so: $(obj)/vdso32.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S new file mode 100644 index 000000000000..9532c4e6a9d2 --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -0,0 +1,39 @@ +/* + * Userland implementation of clock_getres() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include +#include +#include + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + chi %r2,CLOCK_REALTIME + je 0f + chi %r2,CLOCK_MONOTONIC + jne 3f +0: ltr %r3,%r3 + jz 2f /* res == NULL */ + basr %r1,0 +1: l %r0,4f-1b(%r1) + xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ + st %r0,4(%r3) /* store tp->tv_usec */ +2: lhi %r2,0 + br %r14 +3: lhi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +4: .long CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S new file mode 100644 index 000000000000..4a98909a8310 --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -0,0 +1,128 @@ +/* + * Userland implementation of clock_gettime() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include +#include +#include + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + basr %r5,0 +0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,CLOCK_REALTIME + je 10f + chi %r2,CLOCK_MONOTONIC + jne 19f + + /* CLOCK_MONOTONIC */ + ltr %r3,%r3 + jz 9f /* tp == NULL */ +1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,2f + ahi %r0,-1 +2: mhi %r0,1000 /* cyc2ns(clock,cycle_delta) */ + lr %r2,%r0 + lhi %r0,1000 + ltr %r1,%r1 + mr %r0,%r0 + jnm 3f + ahi %r0,1000 +3: alr %r0,%r2 + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,4f + ahi %r0,1 +4: l %r2,__VDSO_XTIME_SEC+4(%r5) + al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ + al %r1,__VDSO_WTOM_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: al %r2,__VDSO_WTOM_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,20f-6b(%r5) + jl 8f +7: ahi %r2,1 + sl %r1,20f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +9: lhi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +10: ltr %r3,%r3 /* tp == NULL */ + jz 18f +11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 11b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,12f + ahi %r0,-1 +12: mhi %r0,1000 /* cyc2ns(clock,cycle_delta) */ + lr %r2,%r0 + lhi %r0,1000 + ltr %r1,%r1 + mr %r0,%r0 + jnm 13f + ahi %r0,1000 +13: alr %r0,%r2 + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,14f + ahi %r0,1 +14: l %r2,__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 11b + basr %r5,0 +15: ltr %r0,%r0 + jnz 16f + cl %r1,20f-15b(%r5) + jl 17f +16: ahi %r2,1 + sl %r1,20f-15b(%r5) + brc 3,15b + ahi %r0,-1 + j 15b +17: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +18: lhi %r2,0 + br %r14 + + /* Fallback to system call */ +19: lhi %r1,__NR_clock_gettime + svc 0 + br %r14 + +20: .long 1000000000 +21: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S new file mode 100644 index 000000000000..c32f29c3d70c --- /dev/null +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -0,0 +1,82 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include +#include +#include + +#include +#include +#include + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + basr %r5,0 +0: al %r5,13f-0b(%r5) /* get &_vdso_data */ +1: ltr %r3,%r3 /* check if tz is NULL */ + je 2f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +2: ltr %r2,%r2 /* check if tv is NULL */ + je 10f + l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,3f + ahi %r0,-1 +3: mhi %r0,1000 /* cyc2ns(clock,cycle_delta) */ + st %r0,24(%r15) + lhi %r0,1000 + ltr %r1,%r1 + mr %r0,%r0 + jnm 4f + ahi %r0,1000 +4: al %r0,24(%r15) + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + l %r4,24(%r15) /* get tv_sec from stack */ + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,11f-6b(%r5) + jl 8f +7: ahi %r4,1 + sl %r1,11f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r4,0(%r2) /* store tv->tv_sec */ + ltr %r1,%r1 + m %r0,12f-6b(%r5) + jnm 9f + al %r0,12f-6b(%r5) +9: srl %r0,6 + st %r0,4(%r2) /* store tv->tv_usec */ +10: slr %r2,%r2 + br %r14 +11: .long 1000000000 +12: .long 274877907 +13: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S new file mode 100644 index 000000000000..79a071e4357e --- /dev/null +++ b/arch/s390/kernel/vdso32/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S new file mode 100644 index 000000000000..a8c379fa1247 --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include + +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390:31-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 000000000000..61639a89e70b --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,13 @@ +#include +#include + + .section ".data.page_aligned" + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/s390/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile new file mode 100644 index 000000000000..6fc8e829258c --- /dev/null +++ b/arch/s390/kernel/vdso64/Makefile @@ -0,0 +1,55 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_64 += -m64 -s + +KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso64.so: $(obj)/vdso64.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso64.so diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S new file mode 100644 index 000000000000..488e31a3c0e7 --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -0,0 +1,39 @@ +/* + * Userland implementation of clock_getres() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include +#include +#include + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + cghi %r2,CLOCK_REALTIME + je 0f + cghi %r2,CLOCK_MONOTONIC + jne 2f +0: ltgr %r3,%r3 + jz 1f /* res == NULL */ + larl %r1,3f + lg %r0,0(%r1) + xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ + stg %r0,8(%r3) /* store tp->tv_usec */ +1: lghi %r2,0 + br %r14 +2: lghi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +3: .quad CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S new file mode 100644 index 000000000000..738a410b7eb2 --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -0,0 +1,89 @@ +/* + * Userland implementation of clock_gettime() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include +#include +#include + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + larl %r5,_vdso_data + cghi %r2,CLOCK_REALTIME + je 4f + cghi %r2,CLOCK_MONOTONIC + jne 9f + + /* CLOCK_MONOTONIC */ + ltgr %r3,%r3 + jz 3f /* tp == NULL */ +0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + mghi %r1,1000 + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + lg %r0,__VDSO_XTIME_SEC(%r5) + alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ + alg %r0,__VDSO_WTOM_SEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,10f +1: clg %r1,0(%r5) + jl 2f + slg %r1,0(%r5) + aghi %r0,1 + j 1b +2: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +3: lghi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +4: ltr %r3,%r3 /* tp == NULL */ + jz 8f +5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 5b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + mghi %r1,1000 + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + lg %r0,__VDSO_XTIME_SEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 5b + larl %r5,10f +6: clg %r1,0(%r5) + jl 7f + slg %r1,0(%r5) + aghi %r0,1 + j 6b +7: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +8: lghi %r2,0 + br %r14 + + /* Fallback to system call */ +9: lghi %r1,__NR_clock_gettime + svc 0 + br %r14 + +10: .quad 1000000000 + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S new file mode 100644 index 000000000000..f873e75634e1 --- /dev/null +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -0,0 +1,56 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include +#include +#include + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + larl %r5,_vdso_data +0: ltgr %r3,%r3 /* check if tz is NULL */ + je 1f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +1: ltgr %r2,%r2 /* check if tv is NULL */ + je 4f + lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + mghi %r1,1000 + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime.tv_nsec */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* xtime.tv_sec */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,5f +2: clg %r1,0(%r5) + jl 3f + slg %r1,0(%r5) + aghi %r0,1 + j 2b +3: stg %r0,0(%r2) /* store tv->tv_sec */ + slgr %r0,%r0 /* tv_nsec -> tv_usec */ + ml %r0,8(%r5) + srlg %r0,%r0,6 + stg %r0,8(%r2) /* store tv->tv_usec */ +4: lghi %r2,0 + br %r14 +5: .quad 1000000000 + .long 274877907 + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S new file mode 100644 index 000000000000..79a071e4357e --- /dev/null +++ b/arch/s390/kernel/vdso64/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S new file mode 100644 index 000000000000..9f5979d102a9 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include + +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 000000000000..d8e2ac14d564 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,13 @@ +#include +#include + + .section ".data.page_aligned" + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/s390/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous -- cgit v1.2.3 From 750887dedc088d28198b170bcae83695247797d1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Dec 2008 13:38:37 +0100 Subject: [S390] convert etr/stp to stop_machine interface This converts the etr and stp code to the new stop_machine interface which allows to synchronize all cpus without allocating any memory. This way we get rid of the only reason why we haven't converted s390 to the generic IPI interface yet. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 212 ++++++++++++++++++++++++++++++------------------ 1 file changed, 131 insertions(+), 81 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index b73bbf31f432..6e09bc285ba0 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -391,6 +393,15 @@ static void enable_sync_clock(void) atomic_set_mask(0x80000000, sw_ptr); } +/* Single threaded workqueue used for etr and stp sync events */ +static struct workqueue_struct *time_sync_wq; + +static void __init time_init_wq(void) +{ + if (!time_sync_wq) + time_sync_wq = create_singlethread_workqueue("timesync"); +} + /* * External Time Reference (ETR) code. */ @@ -483,17 +494,18 @@ static int __init etr_init(void) if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) return 0; + time_init_wq(); /* Check if this machine has the steai instruction. */ if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) etr_steai_available = 1; setup_timer(&etr_timer, etr_timeout, 0UL); if (etr_port0_online) { set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } if (etr_port1_online) { set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } return 0; } @@ -520,7 +532,7 @@ void etr_switch_to_local(void) if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) disable_sync_clock(NULL); set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } /* @@ -536,7 +548,7 @@ void etr_sync_check(void) if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) disable_sync_clock(NULL); set_bit(ETR_EVENT_SYNC_CHECK, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } /* @@ -560,13 +572,13 @@ static void etr_timing_alert(struct etr_irq_parm *intparm) * Both ports are not up-to-date now. */ set_bit(ETR_EVENT_PORT_ALERT, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } static void etr_timeout(unsigned long dummy) { set_bit(ETR_EVENT_UPDATE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } /* @@ -673,14 +685,16 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) } struct clock_sync_data { + atomic_t cpus; int in_sync; unsigned long long fixup_cc; + int etr_port; + struct etr_aib *etr_aib; }; -static void clock_sync_cpu_start(void *dummy) +static void clock_sync_cpu(struct clock_sync_data *sync) { - struct clock_sync_data *sync = dummy; - + atomic_dec(&sync->cpus); enable_sync_clock(); /* * This looks like a busy wait loop but it isn't. etr_sync_cpus @@ -706,39 +720,35 @@ static void clock_sync_cpu_start(void *dummy) fixup_clock_comparator(sync->fixup_cc); } -static void clock_sync_cpu_end(void *dummy) -{ -} - /* * Sync the TOD clock using the port refered to by aibp. This port * has to be enabled and the other port has to be disabled. The * last eacr update has to be more than 1.6 seconds in the past. */ -static int etr_sync_clock(struct etr_aib *aib, int port) +static int etr_sync_clock(void *data) { - struct etr_aib *sync_port; - struct clock_sync_data etr_sync; + static int first; unsigned long long clock, old_clock, delay, delta; - int follows; + struct clock_sync_data *etr_sync; + struct etr_aib *sync_port, *aib; + int port; int rc; - /* Check if the current aib is adjacent to the sync port aib. */ - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - follows = etr_aib_follows(sync_port, aib, port); - memcpy(sync_port, aib, sizeof(*aib)); - if (!follows) - return -EAGAIN; + etr_sync = data; - /* - * Catch all other cpus and make them wait until we have - * successfully synced the clock. smp_call_function will - * return after all other cpus are in etr_sync_cpu_start. - */ - memset(&etr_sync, 0, sizeof(etr_sync)); - preempt_disable(); - smp_call_function(clock_sync_cpu_start, &etr_sync, 0); - local_irq_disable(); + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(etr_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&etr_sync->cpus) != 0) + cpu_relax(); + + port = etr_sync->etr_port; + aib = etr_sync->etr_aib; + sync_port = (port == 0) ? &etr_port0 : &etr_port1; enable_sync_clock(); /* Set clock to next OTE. */ @@ -755,16 +765,16 @@ static int etr_sync_clock(struct etr_aib *aib, int port) delay = (unsigned long long) (aib->edf2.etv - sync_port->edf2.etv) << 32; delta = adjust_time(old_clock, clock, delay); - etr_sync.fixup_cc = delta; + etr_sync->fixup_cc = delta; fixup_clock_comparator(delta); /* Verify that the clock is properly set. */ if (!etr_aib_follows(sync_port, aib, port)) { /* Didn't work. */ disable_sync_clock(NULL); - etr_sync.in_sync = -EAGAIN; + etr_sync->in_sync = -EAGAIN; rc = -EAGAIN; } else { - etr_sync.in_sync = 1; + etr_sync->in_sync = 1; rc = 0; } } else { @@ -772,12 +782,33 @@ static int etr_sync_clock(struct etr_aib *aib, int port) __ctl_clear_bit(0, 29); __ctl_clear_bit(14, 21); disable_sync_clock(NULL); - etr_sync.in_sync = -EAGAIN; + etr_sync->in_sync = -EAGAIN; rc = -EAGAIN; } - local_irq_enable(); - smp_call_function(clock_sync_cpu_end, NULL, 0); - preempt_enable(); + xchg(&first, 0); + return rc; +} + +static int etr_sync_clock_stop(struct etr_aib *aib, int port) +{ + struct clock_sync_data etr_sync; + struct etr_aib *sync_port; + int follows; + int rc; + + /* Check if the current aib is adjacent to the sync port aib. */ + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + follows = etr_aib_follows(sync_port, aib, port); + memcpy(sync_port, aib, sizeof(*aib)); + if (!follows) + return -EAGAIN; + memset(&etr_sync, 0, sizeof(etr_sync)); + etr_sync.etr_aib = aib; + etr_sync.etr_port = port; + get_online_cpus(); + atomic_set(&etr_sync.cpus, num_online_cpus() - 1); + rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map); + put_online_cpus(); return rc; } @@ -934,7 +965,7 @@ static void etr_update_eacr(struct etr_eacr eacr) } /* - * ETR tasklet. In this function you'll find the main logic. In + * ETR work. In this function you'll find the main logic. In * particular this is the only function that calls etr_update_eacr(), * it "controls" the etr control register. */ @@ -1067,7 +1098,7 @@ static void etr_work_fn(struct work_struct *work) etr_update_eacr(eacr); set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); if (now < etr_tolec + (1600000 << 12) || - etr_sync_clock(&aib, sync_port) != 0) { + etr_sync_clock_stop(&aib, sync_port) != 0) { /* Sync failed. Try again in 1/2 second. */ eacr.es = 0; etr_update_eacr(eacr); @@ -1156,13 +1187,13 @@ static ssize_t etr_online_store(struct sys_device *dev, return count; /* Nothing to do. */ etr_port0_online = value; set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } else { if (etr_port1_online == value) return count; /* Nothing to do. */ etr_port1_online = value; set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } return count; } @@ -1396,8 +1427,12 @@ static void __init stp_reset(void) static int __init stp_init(void) { - if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online) - schedule_work(&stp_work); + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return 0; + time_init_wq(); + if (!stp_online) + return 0; + queue_work(time_sync_wq, &stp_work); return 0; } @@ -1414,7 +1449,7 @@ arch_initcall(stp_init); static void stp_timing_alert(struct stp_irq_parm *intparm) { if (intparm->tsc || intparm->lac || intparm->tcpc) - schedule_work(&stp_work); + queue_work(time_sync_wq, &stp_work); } /* @@ -1428,7 +1463,7 @@ void stp_sync_check(void) if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return; disable_sync_clock(NULL); - schedule_work(&stp_work); + queue_work(time_sync_wq, &stp_work); } /* @@ -1442,46 +1477,34 @@ void stp_island_check(void) if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return; disable_sync_clock(NULL); - schedule_work(&stp_work); + queue_work(time_sync_wq, &stp_work); } -/* - * STP tasklet. Check for the STP state and take over the clock - * synchronization if the STP clock source is usable. - */ -static void stp_work_fn(struct work_struct *work) + +static int stp_sync_clock(void *data) { - struct clock_sync_data stp_sync; + static int first; unsigned long long old_clock, delta; + struct clock_sync_data *stp_sync; int rc; - if (!stp_online) { - chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); - return; - } + stp_sync = data; - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); - if (rc) - return; + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(stp_sync); + return 0; + } - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); - if (rc || stp_info.c == 0) - return; + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&stp_sync->cpus) != 0) + cpu_relax(); - /* - * Catch all other cpus and make them wait until we have - * successfully synced the clock. smp_call_function will - * return after all other cpus are in clock_sync_cpu_start. - */ - memset(&stp_sync, 0, sizeof(stp_sync)); - preempt_disable(); - smp_call_function(clock_sync_cpu_start, &stp_sync, 0); - local_irq_disable(); enable_sync_clock(); set_bit(CLOCK_SYNC_STP, &clock_sync_flags); if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); rc = 0; if (stp_info.todoff[0] || stp_info.todoff[1] || @@ -1500,16 +1523,43 @@ static void stp_work_fn(struct work_struct *work) } if (rc) { disable_sync_clock(NULL); - stp_sync.in_sync = -EAGAIN; + stp_sync->in_sync = -EAGAIN; clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); if (etr_port0_online || etr_port1_online) - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } else - stp_sync.in_sync = 1; + stp_sync->in_sync = 1; + xchg(&first, 0); + return 0; +} - local_irq_enable(); - smp_call_function(clock_sync_cpu_end, NULL, 0); - preempt_enable(); +/* + * STP work. Check for the STP state and take over the clock + * synchronization if the STP clock source is usable. + */ +static void stp_work_fn(struct work_struct *work) +{ + struct clock_sync_data stp_sync; + int rc; + + if (!stp_online) { + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + return; + } + + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + if (rc) + return; + + rc = chsc_sstpi(stp_pa