diff options
Diffstat (limited to 'arch/tile/kernel')
-rw-r--r-- | arch/tile/kernel/backtrace.c | 103 | ||||
-rw-r--r-- | arch/tile/kernel/compat.c | 13 | ||||
-rw-r--r-- | arch/tile/kernel/compat_signal.c | 4 | ||||
-rw-r--r-- | arch/tile/kernel/futex_64.S | 55 | ||||
-rw-r--r-- | arch/tile/kernel/hardwall.c | 6 | ||||
-rw-r--r-- | arch/tile/kernel/head_64.S | 269 | ||||
-rw-r--r-- | arch/tile/kernel/intvec_32.S | 175 | ||||
-rw-r--r-- | arch/tile/kernel/intvec_64.S | 1231 | ||||
-rw-r--r-- | arch/tile/kernel/module.c | 8 | ||||
-rw-r--r-- | arch/tile/kernel/pci-dma.c | 2 | ||||
-rw-r--r-- | arch/tile/kernel/pci.c | 206 | ||||
-rw-r--r-- | arch/tile/kernel/process.c | 52 | ||||
-rw-r--r-- | arch/tile/kernel/regs_64.S | 145 | ||||
-rw-r--r-- | arch/tile/kernel/setup.c | 6 | ||||
-rw-r--r-- | arch/tile/kernel/signal.c | 128 | ||||
-rw-r--r-- | arch/tile/kernel/single_step.c | 12 | ||||
-rw-r--r-- | arch/tile/kernel/stack.c | 14 | ||||
-rw-r--r-- | arch/tile/kernel/sys.c | 9 | ||||
-rw-r--r-- | arch/tile/kernel/tile-desc_32.c | 11 | ||||
-rw-r--r-- | arch/tile/kernel/tile-desc_64.c | 2200 | ||||
-rw-r--r-- | arch/tile/kernel/time.c | 2 | ||||
-rw-r--r-- | arch/tile/kernel/tlb.c | 12 | ||||
-rw-r--r-- | arch/tile/kernel/traps.c | 1 |
23 files changed, 4329 insertions, 335 deletions
diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c index 55a6a74974b4..1dc71eabfc5a 100644 --- a/arch/tile/kernel/backtrace.c +++ b/arch/tile/kernel/backtrace.c @@ -14,19 +14,11 @@ #include <linux/kernel.h> #include <linux/string.h> - #include <asm/backtrace.h> - -#include <arch/chip.h> - #include <asm/opcode-tile.h> +#include <arch/abi.h> - -#define TREG_SP 54 -#define TREG_LR 55 - - -#if TILE_CHIP >= 10 +#ifdef __tilegx__ #define tile_bundle_bits tilegx_bundle_bits #define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE #define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES @@ -47,7 +39,7 @@ typedef long long bt_int_reg_t; typedef int bt_int_reg_t; #endif -/** A decoded bundle used for backtracer analysis. */ +/* A decoded bundle used for backtracer analysis. */ struct BacktraceBundle { tile_bundle_bits bits; int num_insns; @@ -56,23 +48,7 @@ struct BacktraceBundle { }; -/* This implementation only makes sense for native tools. */ -/** Default function to read memory. */ -static bool bt_read_memory(void *result, VirtualAddress addr, - unsigned int size, void *extra) -{ - /* FIXME: this should do some horrible signal stuff to catch - * SEGV cleanly and fail. - * - * Or else the caller should do the setjmp for efficiency. - */ - - memcpy(result, (const void *)addr, size); - return true; -} - - -/** Locates an instruction inside the given bundle that +/* Locates an instruction inside the given bundle that * has the specified mnemonic, and whose first 'num_operands_to_match' * operands exactly match those in 'operand_values'. */ @@ -107,13 +83,13 @@ static const struct tile_decoded_instruction *find_matching_insn( return NULL; } -/** Does this bundle contain an 'iret' instruction? */ +/* Does this bundle contain an 'iret' instruction? */ static inline bool bt_has_iret(const struct BacktraceBundle *bundle) { return find_matching_insn(bundle, TILE_OPC_IRET, NULL, 0) != NULL; } -/** Does this bundle contain an 'addi sp, sp, OFFSET' or +/* Does this bundle contain an 'addi sp, sp, OFFSET' or * 'addli sp, sp, OFFSET' instruction, and if so, what is OFFSET? */ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) @@ -124,7 +100,7 @@ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2); if (insn == NULL) insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2); -#if TILE_CHIP >= 10 +#ifdef __tilegx__ if (insn == NULL) insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2); if (insn == NULL) @@ -137,7 +113,7 @@ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) return true; } -/** Does this bundle contain any 'info OP' or 'infol OP' +/* Does this bundle contain any 'info OP' or 'infol OP' * instruction, and if so, what are their OP? Note that OP is interpreted * as an unsigned value by this code since that's what the caller wants. * Returns the number of info ops found. @@ -161,7 +137,7 @@ static int bt_get_info_ops(const struct BacktraceBundle *bundle, return num_ops; } -/** Does this bundle contain a jrp instruction, and if so, to which +/* Does this bundle contain a jrp instruction, and if so, to which * register is it jumping? */ static bool bt_has_jrp(const struct BacktraceBundle *bundle, int *target_reg) @@ -175,7 +151,7 @@ static bool bt_has_jrp(const struct BacktraceBundle *bundle, int *target_reg) return true; } -/** Does this bundle modify the specified register in any way? */ +/* Does this bundle modify the specified register in any way? */ static bool bt_modifies_reg(const struct BacktraceBundle *bundle, int reg) { int i, j; @@ -195,34 +171,34 @@ static bool bt_modifies_reg(const struct BacktraceBundle *bundle, int reg) return false; } -/** Does this bundle modify sp? */ +/* Does this bundle modify sp? */ static inline bool bt_modifies_sp(const struct BacktraceBundle *bundle) { return bt_modifies_reg(bundle, TREG_SP); } -/** Does this bundle modify lr? */ +/* Does this bundle modify lr? */ static inline bool bt_modifies_lr(const struct BacktraceBundle *bundle) { return bt_modifies_reg(bundle, TREG_LR); } -/** Does this bundle contain the instruction 'move fp, sp'? */ +/* Does this bundle contain the instruction 'move fp, sp'? */ static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle) { static const int vals[2] = { 52, TREG_SP }; return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL; } -/** Does this bundle contain a store of lr to sp? */ +/* Does this bundle contain a store of lr to sp? */ static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle) { static const int vals[2] = { TREG_SP, TREG_LR }; return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL; } -#if TILE_CHIP >= 10 -/** Track moveli values placed into registers. */ +#ifdef __tilegx__ +/* Track moveli values placed into registers. */ static inline void bt_update_moveli(const struct BacktraceBundle *bundle, int moveli_args[]) { @@ -238,7 +214,7 @@ static inline void bt_update_moveli(const struct BacktraceBundle *bundle, } } -/** Does this bundle contain an 'add sp, sp, reg' instruction +/* Does this bundle contain an 'add sp, sp, reg' instruction * from a register that we saw a moveli into, and if so, what * is the value in the register? */ @@ -260,11 +236,11 @@ static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust, } #endif -/** Locates the caller's PC and SP for a program starting at the +/* Locates the caller's PC and SP for a program starting at the * given address. */ static void find_caller_pc_and_caller_sp(CallerLocation *location, - const VirtualAddress start_pc, + const unsigned long start_pc, BacktraceMemoryReader read_memory_func, void *read_memory_func_extra) { @@ -288,9 +264,9 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, tile_bundle_bits prefetched_bundles[32]; int num_bundles_prefetched = 0; int next_bundle = 0; - VirtualAddress pc; + unsigned long pc; -#if TILE_CHIP >= 10 +#ifdef __tilegx__ /* Naively try to track moveli values to support addx for -m32. */ int moveli_args[TILEGX_NUM_REGISTERS] = { 0 }; #endif @@ -369,10 +345,6 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, /* Weird; reserved value, ignore it. */ continue; } - if (info_operand & ENTRY_POINT_INFO_OP) { - /* This info op is ignored by the backtracer. */ - continue; - } /* Skip info ops which are not in the * "one_ago" mode we want right now. @@ -453,7 +425,7 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, if (!sp_determined) { int adjust; if (bt_has_addi_sp(&bundle, &adjust) -#if TILE_CHIP >= 10 +#ifdef __tilegx__ || bt_has_add_sp(&bundle, &adjust, moveli_args) #endif ) { @@ -504,7 +476,7 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, } } -#if TILE_CHIP >= 10 +#ifdef __tilegx__ /* Track moveli arguments for -m32 mode. */ bt_update_moveli(&bundle, moveli_args); #endif @@ -546,18 +518,26 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, } } +/* Initializes a backtracer to start from the given location. + * + * If the frame pointer cannot be determined it is set to -1. + * + * state: The state to be filled in. + * read_memory_func: A callback that reads memory. + * read_memory_func_extra: An arbitrary argument to read_memory_func. + * pc: The current PC. + * lr: The current value of the 'lr' register. + * sp: The current value of the 'sp' register. + * r52: The current value of the 'r52' register. + */ void backtrace_init(BacktraceIterator *state, BacktraceMemoryReader read_memory_func, void *read_memory_func_extra, - VirtualAddress pc, VirtualAddress lr, - VirtualAddress sp, VirtualAddress r52) + unsigned long pc, unsigned long lr, + unsigned long sp, unsigned long r52) { CallerLocation location; - VirtualAddress fp, initial_frame_caller_pc; - - if (read_memory_func == NULL) { - read_memory_func = bt_read_memory; - } + unsigned long fp, initial_frame_caller_pc; /* Find out where we are in the initial frame. */ find_caller_pc_and_caller_sp(&location, pc, @@ -630,12 +610,15 @@ void backtrace_init(BacktraceIterator *state, /* Handle the case where the register holds more bits than the VA. */ static bool valid_addr_reg(bt_int_reg_t reg) { - return ((VirtualAddress)reg == reg); + return ((unsigned long)reg == reg); } +/* Advances the backtracing state to the calling frame, returning + * true iff successful. + */ bool backtrace_next(BacktraceIterator *state) { - VirtualAddress next_fp, next_pc; + unsigned long next_fp, next_pc; bt_int_reg_t next_frame[2]; if (state->fp == -1) { diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index dbc213adf5e1..bf5e9d70266c 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -135,26 +135,15 @@ long tile_compat_sys_msgrcv(int msqid, /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL -#define __SYSCALL(nr, call) [nr] = (compat_##call), +#define __SYSCALL(nr, call) [nr] = (call), /* The generic versions of these don't work for Tile. */ #define compat_sys_msgrcv tile_compat_sys_msgrcv #define compat_sys_msgsnd tile_compat_sys_msgsnd /* See comments in sys.c */ -#define compat_sys_fadvise64 sys32_fadvise64 #define compat_sys_fadvise64_64 sys32_fadvise64_64 #define compat_sys_readahead sys32_readahead -#define compat_sys_sync_file_range compat_sys_sync_file_range2 - -/* We leverage the "struct stat64" type for 32-bit time_t/nsec. */ -#define compat_sys_stat64 sys_stat64 -#define compat_sys_lstat64 sys_lstat64 -#define compat_sys_fstat64 sys_fstat64 -#define compat_sys_fstatat64 sys_fstatat64 - -/* The native sys_ptrace dynamically handles compat binaries. */ -#define compat_sys_ptrace sys_ptrace /* Call the trampolines to manage pt_regs where necessary. */ #define compat_sys_execve _compat_sys_execve diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index dbb0dfc7bece..a7869ad62776 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -317,7 +317,7 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs) return 0; badframe: - force_sig(SIGSEGV, current); + signal_fault("bad sigreturn frame", regs, frame, 0); return 0; } @@ -431,6 +431,6 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; give_sigsegv: - force_sigsegv(sig, current); + signal_fault("bad setup frame", regs, frame, sig); return -EFAULT; } diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S new file mode 100644 index 000000000000..f465d1eda20f --- /dev/null +++ b/arch/tile/kernel/futex_64.S @@ -0,0 +1,55 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Atomically access user memory, but use MMU to avoid propagating + * kernel exceptions. + */ + +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/futex.h> +#include <asm/page.h> +#include <asm/processor.h> + +/* + * Provide a set of atomic memory operations supporting <asm/futex.h>. + * + * r0: user address to manipulate + * r1: new value to write, or for cmpxchg, old value to compare against + * r2: (cmpxchg only) new value to write + * + * Return __get_user struct, r0 with value, r1 with error. + */ +#define FUTEX_OP(name, ...) \ +STD_ENTRY(futex_##name) \ + __VA_ARGS__; \ + { \ + move r1, zero; \ + jrp lr \ + }; \ + STD_ENDPROC(futex_##name); \ + .pushsection __ex_table,"a"; \ + .quad 1b, get_user_fault; \ + .popsection + + .pushsection .fixup,"ax" +get_user_fault: + { movei r1, -EFAULT; jrp lr } + ENDPROC(get_user_fault) + .popsection + +FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2) +FUTEX_OP(set, 1: exch4 r0, r0, r1) +FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) +FUTEX_OP(or, 1: fetchor4 r0, r0, r1) +FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1) diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index e910530436e6..3bddef710de4 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -268,12 +268,10 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) found_processes = 0; list_for_each_entry(p, &rect->task_head, thread.hardwall_list) { BUG_ON(p->thread.hardwall != rect); - if (p->sighand) { + if (!(p->flags & PF_EXITING)) { found_processes = 1; pr_notice("hardwall: killing %d\n", p->pid); - spin_lock(&p->sighand->siglock); - __group_send_sig_info(info.si_signo, &info, p); - spin_unlock(&p->sighand->siglock); + do_send_sig_info(info.si_signo, &info, p, false); } } if (!found_processes) diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S new file mode 100644 index 000000000000..6bc3a932fe45 --- /dev/null +++ b/arch/tile/kernel/head_64.S @@ -0,0 +1,269 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE startup code. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> +#include <asm/processor.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> +#include <arch/spr_def.h> + +/* + * This module contains the entry code for kernel images. It performs the + * minimal setup needed to call the generic C routines. + */ + + __HEAD +ENTRY(_start) + /* Notify the hypervisor of what version of the API we want */ + { + movei r1, TILE_CHIP + movei r2, TILE_CHIP_REV + } + { + moveli r0, _HV_VERSION + jal hv_init + } + /* Get a reasonable default ASID in r0 */ + { + move r0, zero + jal hv_inquire_asid + } + + /* + * Install the default page table. The relocation required to + * statically define the table is a bit too complex, so we have + * to plug in the pointer from the L0 to the L1 table by hand. + * We only do this on the first cpu to boot, though, since the + * other CPUs should see a properly-constructed page table. + */ + { + v4int_l r2, zero, r0 /* ASID for hv_install_context */ + moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET) + } + { + shl16insli r4, r4, hw0(swapper_pgprot - PAGE_OFFSET) + } + { + ld r1, r4 /* access_pte for hv_install_context */ + } + { + moveli r0, hw1_last(.Lsv_data_pmd - PAGE_OFFSET) + moveli r6, hw1_last(temp_data_pmd - PAGE_OFFSET) + } + { + /* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */ + bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL + inv r4 + } + bnez r7, .Lno_write + { + shl16insli r0, r0, hw0(.Lsv_data_pmd - PAGE_OFFSET) + shl16insli r6, r6, hw0(temp_data_pmd - PAGE_OFFSET) + } + { + /* Cut off the low bits of the PT address. */ + shrui r6, r6, HV_LOG2_PAGE_TABLE_ALIGN + /* Start with our access pte. */ + move r5, r1 + } + { + /* Stuff the address into the page table pointer slot of the PTE. */ + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + { + /* Store the L0 data PTE. */ + st r0, r5 + addli r6, r6, (temp_code_pmd - temp_data_pmd) >> \ + HV_LOG2_PAGE_TABLE_ALIGN + } + { + addli r0, r0, .Lsv_code_pmd - .Lsv_data_pmd + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + /* Store the L0 code PTE. */ + st r0, r5 + +.Lno_write: + moveli lr, hw2_last(1f) + { + shl16insli lr, lr, hw1(1f) + moveli r0, hw1_last(swapper_pg_dir - PAGE_OFFSET) + } + { + shl16insli lr, lr, hw0(1f) + shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET) + } + { + move r3, zero + j hv_install_context + } +1: + + /* Install the interrupt base. */ + moveli r0, hw2_last(MEM_SV_START) + shl16insli r0, r0, hw1(MEM_SV_START) + shl16insli r0, r0, hw0(MEM_SV_START) + mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 + + /* + * Get our processor number and save it away in SAVE_K_0. + * Extract stuff from the topology structure: r4 = y, r6 = x, + * r5 = width. FIXME: consider whether we want to just make these + * 64-bit values (and if so fix smp_topology write below, too). + */ + jal hv_inquire_topology + { + v4int_l r5, zero, r1 /* r5 = width */ + shrui r4, r0, 32 /* r4 = y */ + } + { + v4int_l r6, zero, r0 /* r6 = x */ + mul_lu_lu r4, r4, r5 + } + { + add r4, r4, r6 /* r4 == cpu == y*width + x */ + } + +#ifdef CONFIG_SMP + /* + * Load up our per-cpu offset. When the first (master) tile + * boots, this value is still zero, so we will load boot_pc + * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * The master tile initializes the per-cpu offset array, so that + * when subsequent (secondary) tiles boot, they will instead load + * from their per-cpu versions of boot_sp and boot_pc. + */ + moveli r5, hw2_last(__per_cpu_offset) + shl16insli r5, r5, hw1(__per_cpu_offset) + shl16insli r5, r5, hw0(__per_cpu_offset) + shl3add r5, r4, r5 + ld r5, r5 + bnez r5, 1f + + /* + * Save the width and height to the smp_topology variable + * for later use. + */ + moveli r0, hw2_last(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw1(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw0(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + st r0, r1 +1: +#else + move r5, zero +#endif + + /* Load and go with the correct pc and sp. */ + { + moveli r1, hw2_last(boot_sp) + moveli r0, hw2_last(boot_pc) + } + { + shl16insli r1, r1, hw1(boot_sp) + shl16insli r0, r0, hw1(boot_pc) + } + { + shl16insli r1, r1, hw0(boot_sp) + shl16insli r0, r0, hw0(boot_pc) + } + { + add r1, r1, r5 + add r0, r0, r5 + } + ld r0, r0 + ld sp, r1 + or r4, sp, r4 + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ + addi sp, sp, -STACK_TOP_DELTA + { + move lr, zero /* stop backtraces in the called function */ + jr r0 + } + ENDPROC(_start) + +__PAGE_ALIGNED_BSS + .align PAGE_SIZE +ENTRY(empty_zero_page) + .fill PAGE_SIZE,1,0 + END(empty_zero_page) + + .macro PTE cpa, bits1 + .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\ + HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\ + (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) + .endm + +__PAGE_ALIGNED_DATA + .align PAGE_SIZE +ENTRY(swapper_pg_dir) + .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE +.Lsv_data_pmd: + .quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE +.Lsv_code_pmd: + .quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + HV_L0_SIZE + END(swapper_pg_dir) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_data_pmd) + /* + * We fill the PAGE_OFFSET pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept HV_L1_ENTRIES + PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE + .set addr, addr + HV_PAGE_SIZE_LARGE + .endr + .org temp_data_pmd + HV_L1_SIZE + END(temp_data_pmd) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_code_pmd) + /* + * We fill the MEM_SV_START pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept HV_L1_ENTRIES + PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE + .set addr, addr + HV_PAGE_SIZE_LARGE + .endr + .org temp_code_pmd + HV_L1_SIZE + END(temp_code_pmd) + + /* + * Isolate swapper_pgprot to its own cache line, since each cpu + * starting up will read it using VA-is-PA and local homing. + * This would otherwise likely conflict with other data on the cache + * line, once we have set its permanent home in the page tables. + */ + __INITDATA + .align CHIP_L2_LINE_SIZE() +ENTRY(swapper_pgprot) + .quad HV_PTE_PRESENT | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) + .align CHIP_L2_LINE_SIZE() + END(swapper_pgprot) diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index fffcfa6b3a62..72ade79b621b 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -851,14 +851,27 @@ STD_ENTRY(interrupt_return) /* Check to see if there is any work to do before returning to user. */ { addi r29, r32, THREAD_INFO_FLAGS_OFFSET - moveli r28, lo16(_TIF_ALLWORK_MASK) + moveli r1, lo16(_TIF_ALLWORK_MASK) } { lw r29, r29 - auli r28, r28, ha16(_TIF_ALLWORK_MASK) + auli r1, r1, ha16(_TIF_ALLWORK_MASK) } - and r28, r29, r28 - bnz r28, .Lwork_pending + and r1, r29, r1 + bzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling or single-step. Call out to C code to figure out + * exactly what we need to do for each flag bit, then if + * necessary, reload the flags and recheck. + */ + push_extra_callee_saves r0 + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_work_pending + } + bnz r0, .Lresume_userspace /* * In the NMI case we @@ -1099,99 +1112,6 @@ STD_ENTRY(interrupt_return) pop_reg r50 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) j .Lcontinue_restore_regs - -.Lwork_pending: - /* Mask the reschedule flag */ - andi r28, r29, _TIF_NEED_RESCHED - - { - /* - * If the NEED_RESCHED flag is called, we call schedule(), which - * may drop this context right here and go do something else. - * On return, jump back to .Lresume_userspace and recheck. - */ - bz r28, .Lasync_tlb - - /* Mask the async-tlb flag */ - andi r28, r29, _TIF_ASYNC_TLB - } - - jal schedule - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lasync_tlb: - { - bz r28, .Lneed_sigpending - - /* Mask the sigpending flag */ - andi r28, r29, _TIF_SIGPENDING - } - - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal do_async_page_fault - FEEDBACK_REENTER(interrupt_return) - - /* - * Go restart the "resume userspace" process. We may have - * fired a signal, and we need to disable interrupts again. - */ - j .Lresume_userspace - -.Lneed_sigpending: - /* - * At this point we are either doing signal handling or single-step, - * so either way make sure we have all the registers saved. - */ - push_extra_callee_saves r0 - - { - /* If no signal pending, skip to singlestep check */ - bz r28, .Lneed_singlestep - - /* Mask the singlestep flag */ - andi r28, r29, _TIF_SINGLESTEP - } - - jal do_signal - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lneed_singlestep: - { - /* Get a pointer to the EX1 field */ - PTREGS_PTR(r29, PTREGS_OFFSET_EX1) - - /* If we get here, our bit must be set. */ - bz r28, .Lwork_confusion - } - /* If we are in priv mode, don't single step */ - lw r28, r29 - andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ - bnz r28, .Lrestore_all - - /* Allow interrupts within the single step code */ - TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */ - IRQ_ENABLE(r20, r21) - - /* try to single-step the current instruction */ - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal single_step_once - FEEDBACK_REENTER(interrupt_return) - - /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */ - IRQ_DISABLE(r20,r21) - - j .Lrestore_all - -.Lwork_confusion: - move r0, r28 - panic "thread_info allwork flags unhandled on userspace resume: %#x" - STD_ENDPROC(interrupt_return) /* @@ -1550,7 +1470,10 @@ STD_ENTRY(_sys_clone) * We place it in the __HEAD section to ensure it is relatively * near to the intvec_SWINT_1 code (reachable by a conditional branch). * - * Must match register usage in do_page_fault(). + * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). + * + * As we do in lib/atomic_asm_32.S, we bypass a store if the value we + * would store is the same as the value we just loaded. */ __HEAD .align 64 @@ -1611,17 +1534,7 @@ ENTRY(sys_cmpxchg) { shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT slt_u r23, r0, r23 - - /* - * Ensure that the TLB is loaded before we take out the lock. - * On TILEPro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. - */ - lw r26, r0 + lw r26, r0 /* see comment in the "#else" for the "lw r26". */ } { s2a r21, r20, r21 @@ -1637,18 +1550,9 @@ ENTRY(sys_cmpxchg) bbs r23, .Lcmpxchg64 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ } - { - /* - * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in - * the icache when we start. This instruction (the jump) is - * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. - */ s2a ATOMIC_LOCK_REG_NAME, r25, r21 - j .Lcmpxchg32_tns + j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ } #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ @@ -1713,24 +1617,25 @@ ENTRY(sys_cmpxchg) { /* * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in + * the lock held (twelve bundles) so that we know it is all in * the icache when we start. This instruction (the jump) is * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. + * we jump to the very end of the second cache line to get that + * line loaded in the icache, then fall through to issue the tns + * in the third cache line, at which point it's all cached. + * Note that is for performance, not correctness. */ j .Lcmpxchg32_tns } #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - ENTRY(__sys_cmpxchg_grab_lock) +/* Symbol for do_page_fault_ics() to use to compare against the PC. */ +.global __sys_cmpxchg_grab_lock +__sys_cmpxchg_grab_lock: /* * Perform the actual cmpxchg or atomic_update. - * Note that the system <arch/atomic.h> header relies on - * atomic_update() to always perform an "mf", so don't make - * it optional or conditional without modifying that code. */ .Ldo_cmpxchg32: { @@ -1748,10 +1653,13 @@ ENTRY(sys_cmpxchg) } { mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ - bbns r22, .Lcmpxchg32_mismatch + bbns r22, .Lcmpxchg32_nostore } + seq r22, r24, r21 /* Are we storing the value we loaded? */ + bbs r22, .Lcmpxchg32_nostore sw r0, r24 + /* The following instruction is the start of the second cache line. */ /* Do slow mtspr here so the following "mf" waits less. */ { move sp, r27 @@ -1759,7 +1667,6 @@ ENTRY(sys_cmpxchg) } mf - /* The following instruction is the start of the |