From 4d178f94ebe123d462a51169b53854cb7f198888 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 12 Apr 2015 09:14:45 -0400 Subject: x86/asm: Merge common 32-bit values in asm-offsets.c Merge common values for 32-bit native and compat. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Denys Vlasenko Link: http://lkml.kernel.org/r/1428844486-6638-1-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets.c | 19 +++++++++++++++++++ arch/x86/kernel/asm-offsets_32.c | 15 --------------- arch/x86/kernel/asm-offsets_64.c | 21 --------------------- 3 files changed, 19 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 9f6b9341950f..b27f6ec90caa 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -41,6 +41,25 @@ void common(void) { OFFSET(pbe_orig_address, pbe, orig_address); OFFSET(pbe_next, pbe, next); +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) + BLANK(); + OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax); + OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx); + OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx); + OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx); + OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si); + OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di); + OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp); + OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp); + OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip); + + BLANK(); + OFFSET(TI_sysenter_return, thread_info, sysenter_return); + + BLANK(); + OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); +#endif + #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 47703aed74cf..628bfd4c06bb 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -17,17 +17,6 @@ void foo(void); void foo(void) { - OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax); - OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx); - OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx); - OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx); - OFFSET(IA32_SIGCONTEXT_si, sigcontext, si); - OFFSET(IA32_SIGCONTEXT_di, sigcontext, di); - OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp); - OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp); - OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip); - BLANK(); - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); @@ -37,7 +26,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_sysenter_return, thread_info, sysenter_return); OFFSET(TI_cpu, thread_info, cpu); BLANK(); @@ -60,9 +48,6 @@ void foo(void) OFFSET(PT_OLDSS, pt_regs, ss); BLANK(); - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); - BLANK(); - OFFSET(saved_context_gdt_desc, saved_context, gdt_desc); BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 5ce6f2da8763..dcaab87da629 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -29,27 +29,6 @@ int main(void) BLANK(); #endif -#ifdef CONFIG_IA32_EMULATION - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - BLANK(); - -#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry) - ENTRY(ax); - ENTRY(bx); - ENTRY(cx); - ENTRY(dx); - ENTRY(si); - ENTRY(di); - ENTRY(bp); - ENTRY(sp); - ENTRY(ip); - BLANK(); -#undef ENTRY - - OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); - BLANK(); -#endif - #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) ENTRY(bx); ENTRY(cx); -- cgit v1.2.3 From 14434052ffb3b7fe8f491e9d0a7793376fb79155 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 12 Apr 2015 09:14:46 -0400 Subject: x86/asm: Remove unused TI_cpu Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Denys Vlasenko Link: http://lkml.kernel.org/r/1428844486-6638-2-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 628bfd4c06bb..6ce39025f467 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -26,9 +26,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_cpu, thread_info, cpu); - BLANK(); - OFFSET(PT_EBX, pt_regs, bx); OFFSET(PT_ECX, pt_regs, cx); OFFSET(PT_EDX, pt_regs, dx); -- cgit v1.2.3 From ff22e2010144b6aa050da35851f1fa79087cca06 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 12 Apr 2015 21:45:06 +0200 Subject: x86/asm, x86/power/hibernate: Use local labels in asm ... so that they don't appear in the object file and thus in objdump output. They're local anyway and have a meaning only within that file. No functionality change. Signed-off-by: Borislav Petkov Acked-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: H. Peter Anvin Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: linux-pm@vger.kernel.org Link: http://lkml.kernel.org/r/1428867906-12016-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/power/hibernate_asm_64.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 3c4469a7a929..e2386cb4e0c3 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -78,9 +78,9 @@ ENTRY(restore_image) /* code below has been relocated to a safe page */ ENTRY(core_restore_code) -loop: +.Lloop: testq %rdx, %rdx - jz done + jz .Ldone /* get addresses from the pbe and copy the page */ movq pbe_address(%rdx), %rsi @@ -91,8 +91,8 @@ loop: /* progress to the next pbe */ movq pbe_next(%rdx), %rdx - jmp loop -done: + jmp .Lloop +.Ldone: /* jump to the restore_registers address from the image header */ jmpq *%rax /* -- cgit v1.2.3 From c0f6feba784e1087b905ad097d2d9ac0aaf744a5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 15 Apr 2015 08:50:14 +0200 Subject: x86/asm, x86/acpi/wakeup_64.S: Make global label a local one Make it a local symbol so that it doesn't appear in objdump output. No functionality change - code remains the same, just the global label disappears: ffffffff81039dbe: bf 03 00 00 00 mov $0x3,%edi ffffffff81039dc3: 31 c0 xor %eax,%eax ffffffff81039dc5: e8 b6 fd ff ff callq ffffffff81039b80 -ffffffff81039dca: eb 00 jmp ffffffff81039dcc - -ffffffff81039dcc : +ffffffff81039dca: eb 00 jmp ffffffff81039dcc ffffffff81039dcc: 48 c7 c0 80 1a ca 82 mov $0xffffffff82ca1a80,%rax ffffffff81039dd3: 48 8b 98 e2 00 00 00 mov 0xe2(%rax),%rbx ffffffff81039dda: 0f 22 e3 mov %rbx,%cr4 Signed-off-by: Borislav Petkov Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Len Brown Cc: Linus Torvalds Cc: Pavel Machek Cc: Rafael J. Wysocki Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429080614-22610-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/wakeup_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index ae693b51ed8e..8c35df468104 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -62,7 +62,7 @@ ENTRY(do_suspend_lowlevel) pushfq popq pt_regs_flags(%rax) - movq $resume_point, saved_rip(%rip) + movq $.Lresume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -75,10 +75,10 @@ ENTRY(do_suspend_lowlevel) xorl %eax, %eax call x86_acpi_enter_sleep_state /* in case something went wrong, restore the machine status and go on */ - jmp resume_point + jmp .Lresume_point .align 4 -resume_point: +.Lresume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx -- cgit v1.2.3 From 6a907738ab9840ca3d71c22cd28fba4cbae7f7ce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Apr 2015 10:51:26 +0100 Subject: x86/asm: Enable fast 32-bit put_user_64() for copy_to_user() For fixed sized copies, copy_to_user() will utilize __put_user_size() fastpaths. However, it is missing the translation for 64-bit copies on x86/32. Testing on a Pinetrail Atom, the 64 bit put_user() fastpath is substantially faster than the generic copy_to_user() fallback. Signed-off-by: Chris Wilson Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Link: http://lkml.kernel.org/r/1429091486-11443-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 3c03a5de64d3..0ed5504c6060 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); return ret; + case 8: + __put_user_size(*(u64 *)from, (u64 __user *)to, + 8, ret, 8); + return ret; } } return __copy_to_user_ll(to, from, n); -- cgit v1.2.3 From aac82d319148c6a84e1bf90b86d3e0ec8bf0ee38 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Apr 2015 15:51:54 -0700 Subject: x86, paravirt, xen: Remove the 64-bit ->irq_enable_sysexit() pvop We don't use irq_enable_sysexit on 64-bit kernels any more. Remove all the paravirt and Xen machinery to support it on 64-bit kernels. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8a03355698fe5b94194e9e7360f19f91c1b2cf1f.1428100853.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 6 ------ arch/x86/include/asm/paravirt_types.h | 7 ++++--- arch/x86/kernel/asm-offsets.c | 2 ++ arch/x86/kernel/paravirt.c | 4 +++- arch/x86/kernel/paravirt_patch_64.c | 1 - arch/x86/xen/enlighten.c | 3 ++- arch/x86/xen/xen-asm_64.S | 16 ---------------- arch/x86/xen/xen-ops.h | 2 ++ 8 files changed, 13 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a821b1cd4fa7..3cdb9eafbf8c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32) swapgs sysretl ENDPROC(native_usergs_sysret32) - -ENTRY(native_irq_enable_sysexit) - swapgs - sti - sysexit -ENDPROC(native_irq_enable_sysexit) #endif /* diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7549b8b369e4..38a0ff9ef06e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -160,13 +160,14 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); unsigned long long (*read_tscp)(unsigned int *aux); +#ifdef CONFIG_X86_32 /* * Atomically enable interrupts and return to userspace. This - * is only ever used to return to 32-bit processes; in a - * 64-bit kernel, it's used for 32-on-64 compat processes, but - * never native 64-bit processes. (Jump, not call.) + * is only used in 32-bit kernels. 64-bit kernels use + * usergs_sysret32 instead. */ void (*irq_enable_sysexit)(void); +#endif /* * Switch to usermode gs and return to 64-bit usermode using diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index b27f6ec90caa..8e3d22a1af94 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -68,7 +68,9 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); +#ifdef CONFIG_X86_32 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); +#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..7563114d9c3a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || +#ifdef CONFIG_X86_32 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || +#endif type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#if defined(CONFIG_X86_32) .irq_enable_sysexit = native_irq_enable_sysexit, #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index a1da6737ba5b..0de21c62c348 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 81665c9f2132..3797b6b31f95 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1267,10 +1267,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tscp = native_read_tscp, .iret = xen_iret, - .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, +#else + .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 985fc3ee0973..a2cabb8bd6bf 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -47,22 +47,6 @@ ENTRY(xen_iret) ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) -/* - * sysexit is not used for 64-bit processes, so it's only ever used to - * return to 32-bit compat userspace. - */ -ENTRY(xen_sysexit) - pushq $__USER32_DS - pushq %rcx - pushq $X86_EFLAGS_IF - pushq $__USER32_CS - pushq %rdx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysexit) -RELOC(xen_sysexit, 1b+1) - ENTRY(xen_sysret64) /* * We're already on the usermode stack at this point, but diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549..c20fe29e65f4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); +#ifdef CONFIG_X86_32 __visible void xen_sysexit(void); +#endif __visible void xen_sysret32(void); __visible void xen_sysret64(void); __visible void xen_adjust_exception_frame(void); -- cgit v1.2.3 From 3462bd2adeadc49d9e126bca3b5536a3437a902d Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Mon, 20 Apr 2015 23:27:11 +0200 Subject: x86/asm: Always inline atomics During some code analysis I realized that atomic_add(), atomic_sub() and friends are not necessarily inlined AND that each function is defined multiple times: atomic_inc: 544 duplicates atomic_dec: 215 duplicates atomic_dec_and_test: 107 duplicates atomic64_inc: 38 duplicates [...] Each definition is exact equally, e.g.: ffffffff813171b8 : 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 01 3e lock add %edi,(%rsi) 5d pop %rbp c3 retq In turn each definition has one or more callsites (sure): ffffffff81317c78: e8 3b f5 ff ff callq ffffffff813171b8 [...] ffffffff8131a062: e8 51 d1 ff ff callq ffffffff813171b8 [...] ffffffff8131a190: e8 23 d0 ff ff callq ffffffff813171b8 [...] The other way around would be to remove the static linkage - but I prefer an enforced inlining here. Before: text data bss dec hex filename 81467393 19874720 20168704 121510817 73e1ba1 vmlinux.orig After: text data bss dec hex filename 81461323 19874720 20168704 121504747 73e03eb vmlinux.inlined Yes, the inlining here makes the kernel even smaller! ;) Linus further observed: "I have this memory of having seen that before - the size heuristics for gcc getting confused by inlining. [...] It might be a good idea to mark things that are basically just wrappers around a single (or a couple of) asm instruction to be always_inline." Signed-off-by: Hagen Paul Pfeifer Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429565231-4609-1-git-send-email-hagen@jauu.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 16 ++++++++-------- arch/x86/include/asm/atomic64_64.h | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e5cd123fdfb..75a9ee8529f3 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i) * * Atomically adds @i to @v. */ -static inline void atomic_add(int i, atomic_t *v) +static __always_inline void atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v) * * Atomically subtracts @i from @v. */ -static inline void atomic_sub(int i, atomic_t *v) +static __always_inline void atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline int atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -static inline void atomic_inc(atomic_t *v) +static __always_inline void atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); @@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic_dec(atomic_t *v) +static __always_inline void atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); @@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic_dec_and_test(atomic_t *v) +static __always_inline int atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } @@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new) * Atomically adds @a to @v, so long as @v was not already @u. * Returns the old value of @v. */ -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index f8d273e18516..b965f9e03f2a 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i) * * Atomically adds @i to @v. */ -static inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static __always_inline void atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) @@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static __always_inline void atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) @@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } -- cgit v1.2.3 From 17be0aec74fb036eb4eb32c2268f3420a034762b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 21 Apr 2015 18:27:29 +0200 Subject: x86/asm/entry/64: Implement better check for canonical addresses This change makes the check exact (no more false positives on "negative" addresses). Andy explains: "Canonical addresses either start with 17 zeros or 17 ones. In the old code, we checked that the top (64-47) = 17 bits were all zero. We did this by shifting right by 47 bits and making sure that nothing was left. In the new code, we're shifting left by (64 - 48) = 16 bits and then signed shifting right by the same amount, this propagating the 17th highest bit to all positions to its left. If we get the same value we started with, then we're good to go." While it isn't really important to be fully correct here - almost all addresses we'll ever see will be userspace ones, but OTOH it looks to be cheap enough: the new code uses two more ALU ops but preserves %rcx, allowing to not reload it from pt_regs->cx again. On disassembly level, the changes are: cmp %rcx,0x80(%rsp) -> mov 0x80(%rsp),%r11; cmp %rcx,%r11 shr $0x2f,%rcx -> shl $0x10,%rcx; sar $0x10,%rcx; cmp %rcx,%r11 mov 0x58(%rsp),%rcx -> (eliminated) Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429633649-20169-1-git-send-email-dvlasenk@redhat.com [ Changelog massage. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c7b238494b31..3c78a15a537d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -410,26 +410,27 @@ syscall_return: * a completely clean 64-bit userspace context. */ movq RCX(%rsp),%rcx - cmpq %rcx,RIP(%rsp) /* RCX == RIP */ + movq RIP(%rsp),%r11 + cmpq %rcx,%r11 /* RCX == RIP */ jne opportunistic_sysret_failed /* * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP * in kernel space. This essentially lets the user take over - * the kernel, since userspace controls RSP. It's not worth - * testing for canonicalness exactly -- this check detects any - * of the 17 high bits set, which is true for non-canonical - * or kernel addresses. (This will pessimize vsyscall=native. - * Big deal.) + * the kernel, since userspace controls RSP. * - * If virtual addresses ever become wider, this will need + * If width of "canonical tail" ever becomes variable, this will need * to be updated to remain correct on both old and new CPUs. */ .ifne __VIRTUAL_MASK_SHIFT - 47 .error "virtual address width changed -- SYSRET checks need update" .endif - shr $__VIRTUAL_MASK_SHIFT, %rcx - jnz opportunistic_sysret_failed + /* Change top 16 bits to be the sign-extension of 47th bit */ + shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx + /* If this changed %rcx, it was not canonical */ + cmpq %rcx, %r11 + jne opportunistic_sysret_failed cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ jne opportunistic_sysret_failed @@ -466,8 +467,8 @@ syscall_return: */ syscall_return_via_sysret: CFI_REMEMBER_STATE - /* r11 is already restored (see code above) */ - RESTORE_C_REGS_EXCEPT_R11 + /* rcx and r11 are already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp),%rsp USERGS_SYSRET64 CFI_RESTORE_STATE -- cgit v1.2.3 From ac7f5dfb0348a33b2ea92a0c477103c4db45ad4e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 21 Apr 2015 18:03:13 +0200 Subject: x86/asm/entry/64: Merge 32-bit execve stubs with x32 ones, as they are identical Run-tested. Suggested-by: Brian Gerst Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429632194-13445-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3c78a15a537d..e952f6bf1d6d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -525,40 +525,27 @@ GLOBAL(stub_execveat) CFI_ENDPROC END(stub_execveat) -#ifdef CONFIG_X86_X32_ABI +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) .align 8 GLOBAL(stub_x32_execve) +GLOBAL(stub32_execve) CFI_STARTPROC DEFAULT_FRAME 0, 8 call compat_sys_execve jmp return_from_execve CFI_ENDPROC +END(stub32_execve) END(stub_x32_execve) .align 8 GLOBAL(stub_x32_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 - call compat_sys_execveat - jmp return_from_execve - CFI_ENDPROC -END(stub_x32_execveat) -#endif - -#ifdef CONFIG_IA32_EMULATION - .align 8 -GLOBAL(stub32_execve) - CFI_STARTPROC - call compat_sys_execve - jmp return_from_execve - CFI_ENDPROC -END(stub32_execve) - .align 8 GLOBAL(stub32_execveat) CFI_STARTPROC + DEFAULT_FRAME 0, 8 call compat_sys_execveat jmp return_from_execve CFI_ENDPROC END(stub32_execveat) +END(stub_x32_execveat) #endif /* -- cgit v1.2.3 From 3f5159a9221f19b08275b0a6388ab14392ae4eec Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 21 Apr 2015 18:03:14 +0200 Subject: x86/asm/entry/32: Update -ENOSYS handling to match the 64-bit logic Recently Andy changed the 64-bit syscall logic so that pt_regs->ax is initially set to -ENOSYS, and on syscall exit, it is updated with the actual return value. This simplified the logic there. This patch does the same for 32-bit syscall entry points. The check for %rax being too big is moved to be just before the call instruction which dispatches execution through the syscall table. There is no way to accidentally skip this check now by jumping to a label after it. This allows us to remove redundant checks after ptrace et al. If %rax is too big, we just skip over the (call, write %rax to pt_regs->ax) instruction pair. pt_regs->ax remains set to -ENOSYS, and it gets returned to userspace. Similar to 64-bit code, this eliminates the "ia32_badsys" code path. Run-tested. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Will Drewry Link: http://lkml.kernel.org/r/1429632194-13445-2-git-send-email-dvlasenk@redhat.com [ Changelog massage. ] Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 3cdb9eafbf8c..56fd6dd2e342 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -136,7 +136,7 @@ ENTRY(ia32_sysenter_target) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -163,8 +163,6 @@ sysenter_flags_fixed: testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz sysenter_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys sysenter_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -173,8 +171,11 @@ sysenter_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ sysenter_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -241,9 +242,7 @@ sysexit_from_sys_call: movl %ebx,%esi /* 2nd arg: 1st syscall arg */ movl %eax,%edi /* 1st arg: syscall number */ call __audit_syscall_entry - movl RAX(%rsp),%eax /* reload syscall number */ - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys + movl ORIG_RAX(%rsp),%eax /* reload syscall number */ movl %ebx,%edi /* reload 1st syscall arg */ movl RCX(%rsp),%esi /* reload 2nd syscall arg */ movl RDX(%rsp),%edx /* reload 3rd syscall arg */ @@ -294,13 +293,10 @@ sysenter_tracesys: #endif SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -370,7 +366,7 @@ ENTRY(ia32_cstar_target) pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rbp /* pt_regs->cx */ movl %ebp,%ecx - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -386,8 +382,6 @@ ENTRY(ia32_cstar_target) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz cstar_tracesys - cmpq $IA32_NR_syscalls-1,%rax - ja ia32_badsys cstar_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -396,8 +390,11 @@ cstar_do_call: movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ cstar_dispatch: + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) movq %rax,RAX(%rsp) +1: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -444,14 +441,11 @@ cstar_tracesys: xchgl %r9d,%ebp SAVE_EXTRA_REGS CLEAR_RREGS r9 - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS xchgl %ebp,%r9d - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -510,7 +504,7 @@ ENTRY(ia32_syscall) pushq_cfi_reg rsi /* pt_regs->si */ pushq_cfi_reg rdx /* pt_regs->dx */ pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi_reg rax /* pt_regs->ax */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ CFI_ADJUST_CFA_OFFSET 10*8 @@ -518,8 +512,6 @@ ENTRY(ia32_syscall) orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz ia32_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys ia32_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -527,9 +519,12 @@ ia32_do_call: xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ movl %ebx,%edi /* arg1 */ movl %edx,%edx /* arg3 (zero extension) */ + cmpq $(IA32_NR_syscalls-1),%rax + ja 1f call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX(%rsp) +1: ia32_ret_from_sys_call: CLEAR_RREGS jmp int_ret_from_sys_call @@ -537,23 +532,14 @@ ia32_ret_from_sys_call: ia32_tracesys: SAVE_EXTRA_REGS CLEAR_RREGS - movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call + CFI_ENDPROC END(ia32_syscall) -ia32_badsys: - movq $0,ORIG_RAX(%rsp) - movq $-ENOSYS,%rax - jmp ia32_sysret - - CFI_ENDPROC - .macro PTREGSCALL label, func ALIGN GLOBAL(\label) -- cgit v1.2.3 From 5f0052f9522b84269e1b3b435a806f873d992702 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:23 +0800 Subject: x86/irq: Save destination CPU ID in irq_cfg Cache destination CPU APIC ID into struct irq_cfg when assigning vector for interrupt. Upper layer just needs to read the cached APIC ID instead of calling apic->cpu_mask_to_apicid_and(), it helps to hide APIC driver details from IOAPIC/HPET/MSI drivers.. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 1 + arch/x86/kernel/apic/vector.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..cda96954cbbf 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -116,6 +116,7 @@ struct irq_data; struct irq_cfg { cpumask_var_t domain; cpumask_var_t old_domain; + unsigned int dest_apicid; u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6cedd7914581..c724ef6b218c 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -188,6 +188,12 @@ next: } free_cpumask_var(tmp_mask); + if (!err) { + /* cache destination APIC IDs into cfg->dest_apicid */ + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, + &cfg->dest_apicid); + } + return err; } -- cgit v1.2.3 From b5dc8e6c21e7ffba0246bf39cea97805c142bf85 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:24 +0800 Subject: x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors Abstract CPU local APIC as an interrupt controller and create an irqdomain for it to manage CPU interrupt vectors. It's the base to enable hierarchical irqdomains on x86 systems. The final irqdomain hierarchy will look like this: IOAPIC domain ----| MSI/MSI-x domain ----> [Interrupt Remapping domain] -> CPU vector domain HPET_IRQ domain ----| ^ | DMAR domain ----------------------------------------------| HT_IRQ domain ----------------------------------------------| Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Prarit Bhargava Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428905519-23704-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/hw_irq.h | 17 +++++ arch/x86/kernel/apic/io_apic.c | 3 - arch/x86/kernel/apic/vector.c | 155 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 160 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6049d587599e..e75a96c38c58 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -465,7 +465,6 @@ config X86_INTEL_CE select X86_REBOOTFIXUPS select OF select OF_EARLY_FLATTREE - select IRQ_DOMAIN ---help--- Select for the Intel CE media processor (CE4100) SOC. This option compiles in support for the CE4100 SOC for settop @@ -914,11 +913,11 @@ config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + select IRQ_DOMAIN_HIERARCHY config X86_IO_APIC def_bool y depends on X86_LOCAL_APIC || X86_UP_IOAPIC - select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS bool "Reroute for broken boot IRQs" diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index cda96954cbbf..5b951ac56aa1 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -112,6 +112,17 @@ struct irq_2_irte { #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; +struct irq_domain; + +struct irq_alloc_info { + u32 flags; + const struct cpumask *mask; /* CPU mask for vector allocation */ +}; + +enum { + /* Allocate contiguous CPU vectors */ + X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, +}; struct irq_cfg { cpumask_var_t domain; @@ -135,6 +146,12 @@ struct irq_cfg { }; }; +extern struct irq_domain *x86_vector_domain; + +extern void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask); +extern void copy_irq_alloc_info(struct irq_alloc_info *dst, + struct irq_alloc_info *src); extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f4dc2462a1ac..56d532106ef3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2356,9 +2356,6 @@ static int mp_irqdomain_create(int ioapic) ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1); - if (gsi_cfg->gsi_base == 0) - irq_set_default_host(ip->irqdomain); - return 0; } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c724ef6b218c..6358d8d351f5 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu + * Enable support of hierarchical irqdomains * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,7 +21,9 @@ #include #include +struct irq_domain *x86_vector_domain; static DEFINE_RAW_SPINLOCK(vector_lock); +static struct irq_chip lapic_controller; void lock_vector_lock(void) { @@ -36,15 +40,21 @@ void unlock_vector_lock(void) struct irq_cfg *irq_cfg(unsigned int irq) { - return irq_get_chip_data(irq); + return irqd_cfg(irq_get_irq_data(irq)); } struct irq_cfg *irqd_cfg(struct irq_data *irq_data) { + if (!irq_data) + return NULL; + + while (irq_data->parent_data) + irq_data = irq_data->parent_data; + return irq_data->chip_data; } -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) +static struct irq_cfg *alloc_irq_cfg(int node) { struct irq_cfg *cfg; @@ -79,7 +89,7 @@ struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) return cfg; } - cfg = alloc_irq_cfg(at, node); + cfg = alloc_irq_cfg(node); if (cfg) irq_set_chip_data(at, cfg); else @@ -87,14 +97,13 @@ struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) return cfg; } -static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) +static void free_irq_cfg(struct irq_cfg *cfg) { - if (!cfg) - return; - irq_set_chip_data(at, NULL); - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); + if (cfg) { + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); + } } static int @@ -241,6 +250,90 @@ void clear_irq_vector(int irq, struct irq_cfg *cfg) raw_spin_unlock_irqrestore(&vector_lock, flags); } +void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask) +{ + memset(info, 0, sizeof(*info)); + info->mask = mask; +} + +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) +{ + if (src) + *dst = *src; + else + memset(dst, 0, sizeof(*dst)); +} + +static inline const struct cpumask * +irq_alloc_info_get_mask(struct irq_alloc_info *info) +{ + return (!info || !info->mask) ? apic->target_cpus() : info->mask; +} + +static void x86_vector_free_irqs(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irq_data; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); + if (irq_data && irq_data->chip_data) { + free_remapped_irq(virq); + clear_irq_vector(virq + i, irq_data->chip_data); + free_irq_cfg(irq_data->chip_data); + irq_domain_reset_irq_data(irq_data); + } + } +} + +static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + const struct cpumask *mask; + struct irq_data *irq_data; + struct irq_cfg *cfg; + int i, err; + + if (disable_apic) + return -ENXIO; + + /* Currently vector allocator can't guarantee contiguous allocations */ + if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) + return -ENOSYS; + + mask = irq_alloc_info_get_mask(info); + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + BUG_ON(!irq_data); + cfg = alloc_irq_cfg(irq_data->node); + if (!cfg) { + err = -ENOMEM; + goto error; + } + + irq_data->chip = &lapic_controller; + irq_data->chip_data = cfg; + irq_data->hwirq = virq + i; + err = assign_irq_vector(virq, cfg, mask); + if (err) + goto error; + } + + return 0; + +error: + x86_vector_free_irqs(domain, virq, i + 1); + return err; +} + +static struct irq_domain_ops x86_vector_domain_ops = { + .alloc = x86_vector_alloc_irqs, + .free = x86_vector_free_irqs, +}; + int __init arch_probe_nr_irqs(void) { int nr; @@ -266,6 +359,11 @@ int __init arch_probe_nr_irqs(void) int __init arch_early_irq_init(void) { + x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops, + NULL); + BUG_ON(x86_vector_domain == NULL); + irq_set_default_host(x86_vector_domain); + return arch_early_ioapic_init(); } @@ -380,6 +478,36 @@ int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, return 0; } +static int vector_set_affinity(struct irq_data *irq_data, + const struct cpumask *dest, bool force) +{ + struct irq_cfg *cfg = irq_data->chip_data; + int err, irq = irq_data->irq; + + if (!config_enabled(CONFIG_SMP)) + return -EPERM; + + if (!cpumask_intersects(dest, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, cfg, dest); + if (err) { + struct irq_data *top = irq_get_irq_data(irq); + + if (assign_irq_vector(irq, cfg, top->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + return IRQ_SET_MASK_OK; +} + +static struct irq_chip lapic_controller = { + .irq_ack = apic_ack_edge, + .irq_set_affinity = vector_set_affinity, + .irq_retrigger = apic_retrigger_irq, +}; + #ifdef CONFIG_SMP void send_cleanup_vector(struct irq_cfg *cfg) { @@ -497,7 +625,7 @@ int arch_setup_hwirq(unsigned int irq, int node) unsigned long flags; int ret; - cfg = alloc_irq_cfg(irq, node); + cfg = alloc_irq_cfg(node); if (!cfg) return -ENOMEM; @@ -508,7 +636,7 @@ int arch_setup_hwirq(unsigned int irq, int node) if (!ret) irq_set_chip_data(irq, cfg); else - free_irq_cfg(irq, cfg); + free_irq_cfg(cfg); return ret; } @@ -518,7 +646,8 @@ void arch_teardown_hwirq(unsigned int irq) free_remapped_irq(irq); clear_irq_vector(irq, cfg); - free_irq_cfg(irq, cfg); + irq_set_chip_data(irq, NULL); + free_irq_cfg(cfg); } static void __init print_APIC_field(int base) -- cgit v1.2.3 From bd8eb63f8a3907bb477992145cb6ce0064a1e43f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:25 +0800 Subject: x86/hpet: Use new irqdomain interfaces to allocate/free IRQ Use new irqdomain interfaces to allocate/free IRQ for HPET, so we can remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Srivatsa S. Bhat Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/1416894816-23245-4-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3acbff4716b0..ae29554f57ea 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -476,7 +477,7 @@ static int hpet_msi_next_event(unsigned long delta, static int hpet_setup_msi_irq(unsigned int irq) { if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { - irq_free_hwirq(irq); + irq_domain_free_irqs(irq, 1); return -EINVAL; } return 0; @@ -484,9 +485,10 @@ static int hpet_setup_msi_irq(unsigned int irq) static int hpet_assign_irq(struct hpet_dev *dev) { - unsigned int irq = irq_alloc_hwirq(-1); + int irq; - if (!irq) + irq = irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL); + if (irq <= 0) return -EINVAL; irq_set_handler_data(irq, dev); -- cgit v1.2.3 From 4c8f9960ee497020d0858362c81ece984bc89aa5 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:26 +0800 Subject: x86/MSI: Use new irqdomain interfaces to allocate/free IRQ Use new irqdomain interfaces to allocate/free IRQ for PCI MSI, so we can remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1416894816-23245-5-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/msi.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index d6ba2d660dc5..76cc2c902176 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -146,23 +147,20 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *msidesc; - unsigned int irq; - int node, ret; + int irq, ret; /* Multiple MSI vectors only supported with interrupt remapping */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; - node = dev_to_node(&dev->dev); - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = irq_alloc_hwirq(node); - if (!irq) + irq = irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL); + if (irq <= 0) return -ENOSPC; ret = setup_msi_irq(dev, msidesc, irq, 0); if (ret < 0) { - irq_free_hwirq(irq); + irq_domain_free_irqs(irq, 1); return ret; } @@ -172,7 +170,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) void native_teardown_msi_irq(unsigned int irq) { - irq_free_hwirq(irq); + irq_domain_free_irqs(irq, 1); } #ifdef CONFIG_DMAR_TABLE -- cgit v1.2.3 From 331dd19eee243e1b7e670c5993609121817afeaa Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:27 +0800 Subject: x86/uv: Use new irqdomain interfaces to allocate/free IRQ Use new irqdomain interfaces to allocate/free IRQ, so we can remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-6-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/uv/uv_irq.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 0ce673645432..474912d03f40 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -130,24 +131,14 @@ static int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { - const struct cpumask *eligible_cpu = cpumask_of(cpu); struct irq_cfg *cfg = irq_cfg(irq); unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - int mmr_pnode, err; - unsigned int dest; + int mmr_pnode; BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); - if (err != 0) - return err; - if (limit == UV_AFFINITY_CPU) irq_set_status_flags(irq, IRQ_NO_BALANCING); else @@ -164,7 +155,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = dest; + entry->dest = cfg->dest_apicid; mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -238,9 +229,13 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { - int ret, irq = irq_alloc_hwirq(uv_blade_to_memory_nid(mmr_blade)); + int ret, irq; + struct irq_alloc_info info; - if (!irq) + init_irq_alloc_info(&info, cpumask_of(cpu)); + irq = irq_domain_alloc_irqs(NULL, 1, uv_blade_to_memory_nid(mmr_blade), + &info); + if (irq <= 0) return -EBUSY; ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, @@ -248,7 +243,7 @@ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, if (ret == irq) uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); else - irq_free_hwirq(irq); + irq_domain_free_irqs(irq, 1); return ret; } @@ -283,6 +278,6 @@ void uv_teardown_irq(unsigned int irq) n = n->rb_right; } spin_unlock_irqrestore(&uv_irq_lock, irqflags); - irq_free_hwirq(irq); + irq_domain_free_irqs(irq, 1); } EXPORT_SYMBOL_GPL(uv_teardown_irq); -- cgit v1.2.3 From af87baedf2c23b1181f51323339210a26a64f7fc Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:28 +0800 Subject: x86/htirq: Use new irqdomain interfaces to allocate/free IRQ Use new irqdomain interfaces to allocate/free IRQ for HTIRQ, so we can remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later. This patch changes the interfaces between arch independent PCI driver and arch specific code. Currently HT_IRQ is only enabled on x86, so it does not affect other architectures. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-7-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/htirq.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index 816f36e979ad..b307ee7a7148 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -61,31 +62,30 @@ static struct irq_chip ht_irq_chip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; +int arch_alloc_ht_irq(struct pci_dev *dev) +{ + return irq_domain_alloc_irqs(NULL, 1, dev_to_node(&dev->dev), NULL); +} + +void arch_free_ht_irq(int irq) +{ + irq_domain_free_irqs(irq, 1); +} + int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; struct ht_irq_msg msg; - unsigned dest; - int err; if (disable_apic) return -ENXIO; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); msg.address_lo = HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) | HT_IRQ_LOW_VECTOR(cfg->vector) | ((apic->irq_dest_mode == 0) ? HT_IRQ_LOW_DM_PHYSICAL : -- cgit v1.2.3 From a62b32cdd0a6324c959f40b3c9b928b275297066 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:29 +0800 Subject: x86/dmar: Use new irqdomain interfaces to allocate/free IRQ Use new irqdomain interfaces to allocate/free IRQ for DMAR and interrupt remapping, so we can remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later. The private definitions of irq_alloc_hwirqs()/irq_free_hwirqs() are a temporary solution, they will be removed once we have converted the interrupt remapping driver to use irqdomain framework. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Joerg Roedel Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428905519-23704-8-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 4 ++-- arch/x86/kernel/apic/msi.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 6224d316c405..86d897bc15dd 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -99,7 +99,7 @@ static inline bool setup_remapped_irq(int irq, } #endif /* CONFIG_IRQ_REMAP */ -#define dmar_alloc_hwirq() irq_alloc_hwirq(-1) -#define dmar_free_hwirq irq_free_hwirq +extern int dmar_alloc_hwirq(void); +extern void dmar_free_hwirq(int irq); #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 76cc2c902176..9be7d6d8a579 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -223,6 +223,16 @@ int arch_setup_dmar_msi(unsigned int irq) "edge"); return 0; } + +int dmar_alloc_hwirq(void) +{ + return irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL); +} + +void dmar_free_hwirq(int irq) +{ + irq_domain_free_irqs(irq, 1); +} #endif /* -- cgit v1.2.3 From 947045a2aac1157c85a24984c9a8128846ae7266 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:30 +0800 Subject: irq_remapping: Introduce new interfaces to support hierarchical irqdomains Introduce new interfaces for interrupt remapping drivers to support hierarchical irqdomains: 1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an interrupt remapping unit. IOAPIC/HPET drivers use this interface to get parent interrupt remapping irqdomain. 2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation. This is mainly used to support MSI irqdomain. We must build one MSI irqdomain for each interrupt remapping unit. MSI driver calls this interface to get MSI irqdomain associated with an IR irqdomain which manages the PCI devices. In a further step we will store the irqdomain pointer in the device struct to avoid this call in the irq allocation path. Architecture specific hooks: 1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain, which is x86_vector_domain on x86 platforms. 2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with the interrupt remapping unit. We also add following callbacks into struct irq_remap_ops: struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *); struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *); Once all clients of IR have been converted to the new hierarchical irqdomain interfaces, we will: 1) Remove set_ioapic_entry, set_affinity, free_