diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-12 13:58:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-12 13:58:15 -0700 |
commit | dd502a81077a5f3b3e19fa9a1accffdcab5ad5bc (patch) | |
tree | e993e8d13deadff82ec22d34e68860c71e5443f7 /arch/x86/kernel | |
parent | 34eb62d868d729e9a252aa497277081fb652eeed (diff) | |
parent | 69e0ad37c9f32d5aa1beb02aab4ec0cd055be013 (diff) |
Merge tag 'core-static_call-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull static call support from Ingo Molnar:
"This introduces static_call(), which is the idea of static_branch()
applied to indirect function calls. Remove a data load (indirection)
by modifying the text.
They give the flexibility of function pointers, but with better
performance. (This is especially important for cases where retpolines
would otherwise be used, as retpolines can be pretty slow.)
API overview:
DECLARE_STATIC_CALL(name, func);
DEFINE_STATIC_CALL(name, func);
DEFINE_STATIC_CALL_NULL(name, typename);
static_call(name)(args...);
static_call_cond(name)(args...);
static_call_update(name, func);
x86 is supported via text patching, otherwise basic indirect calls are
used, with function pointers.
There's a second variant using inline code patching, inspired by
jump-labels, implemented on x86 as well.
The new APIs are utilized in the x86 perf code, a heavy user of
function pointers, where static calls speed up the PMU handler by
4.2% (!).
The generic implementation is not really excercised on other
architectures, outside of the trivial test_static_call_init()
self-test"
* tag 'core-static_call-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
static_call: Fix return type of static_call_init
tracepoint: Fix out of sync data passing by static caller
tracepoint: Fix overly long tracepoint names
x86/perf, static_call: Optimize x86_pmu methods
tracepoint: Optimize using static_call()
static_call: Allow early init
static_call: Add some validation
static_call: Handle tail-calls
static_call: Add static_call_cond()
x86/alternatives: Teach text_poke_bp() to emulate RET
static_call: Add simple self-test for static calls
x86/static_call: Add inline static call implementation for x86-64
x86/static_call: Add out-of-line static call implementation
static_call: Avoid kprobes on inline static_call()s
static_call: Add inline static call infrastructure
static_call: Add basic static call infrastructure
compiler.h: Make __ADDRESSABLE() symbol truly unique
jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved()
module: Properly propagate MODULE_STATE_COMING failure
module: Fix up module_notifier return values
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/opt.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/static_call.c | 98 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 1 |
6 files changed, 110 insertions, 1 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e77261db2391..de09af019e23 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -68,6 +68,7 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o obj-y += irqflags.o +obj-y += static_call.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cdaab30880b9..4adbe65afe23 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1103,6 +1103,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs) */ goto out_put; + case RET_INSN_OPCODE: + int3_emulate_ret(regs); + break; + case CALL_INSN_OPCODE: int3_emulate_call(regs, (long)ip + tp->rel32); break; @@ -1277,6 +1281,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, switch (tp->opcode) { case INT3_INSN_OPCODE: + case RET_INSN_OPCODE: break; case CALL_INSN_OPCODE: diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 40f380461e6d..c068e21c2c40 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -18,6 +18,7 @@ #include <linux/ftrace.h> #include <linux/frame.h> #include <linux/pgtable.h> +#include <linux/static_call.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -210,7 +211,8 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) /* Check whether the address range is reserved */ if (ftrace_text_reserved(src, src + len - 1) || alternatives_text_reserved(src, src + len - 1) || - jump_label_text_reserved(src, src + len - 1)) + jump_label_text_reserved(src, src + len - 1) || + static_call_text_reserved(src, src + len - 1)) return -EBUSY; return len; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d41be0df72f8..fa16b906ea3f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/tboot.h> #include <linux/usb/xhci-dbgp.h> +#include <linux/static_call.h> #include <uapi/linux/mount.h> @@ -849,6 +850,7 @@ void __init setup_arch(char **cmdline_p) early_cpu_init(); arch_init_ideal_nops(); jump_label_init(); + static_call_init(); early_ioremap_init(); setup_olpc_ofw_pgd(); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c new file mode 100644 index 000000000000..ca9a380d9c0b --- /dev/null +++ b/arch/x86/kernel/static_call.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/static_call.h> +#include <linux/memory.h> +#include <linux/bug.h> +#include <asm/text-patching.h> + +enum insn_type { + CALL = 0, /* site call */ + NOP = 1, /* site cond-call */ + JMP = 2, /* tramp / site tail-call */ + RET = 3, /* tramp / site cond-tail-call */ +}; + +static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +{ + int size = CALL_INSN_SIZE; + const void *code; + + switch (type) { + case CALL: + code = text_gen_insn(CALL_INSN_OPCODE, insn, func); + break; + + case NOP: + code = ideal_nops[NOP_ATOMIC5]; + break; + + case JMP: + code = text_gen_insn(JMP32_INSN_OPCODE, insn, func); + break; + + case RET: + code = text_gen_insn(RET_INSN_OPCODE, insn, func); + size = RET_INSN_SIZE; + break; + } + + if (memcmp(insn, code, size) == 0) + return; + + if (unlikely(system_state == SYSTEM_BOOTING)) + return text_poke_early(insn, code, size); + + text_poke_bp(insn, code, size, NULL); +} + +static void __static_call_validate(void *insn, bool tail) +{ + u8 opcode = *(u8 *)insn; + + if (tail) { + if (opcode == JMP32_INSN_OPCODE || + opcode == RET_INSN_OPCODE) + return; + } else { + if (opcode == CALL_INSN_OPCODE || + !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5)) + return; + } + + /* + * If we ever trigger this, our text is corrupt, we'll probably not live long. + */ + WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn); +} + +static inline enum insn_type __sc_insn(bool null, bool tail) +{ + /* + * Encode the following table without branches: + * + * tail null insn + * -----+-------+------ + * 0 | 0 | CALL + * 0 | 1 | NOP + * 1 | 0 | JMP + * 1 | 1 | RET + */ + return 2*tail + null; +} + +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) +{ + mutex_lock(&text_mutex); + + if (tramp) { + __static_call_validate(tramp, true); + __static_call_transform(tramp, __sc_insn(!func, true), func); + } + + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { + __static_call_validate(site, tail); + __static_call_transform(site, __sc_insn(!func, tail), func); + } + + mutex_unlock(&text_mutex); +} +EXPORT_SYMBOL_GPL(arch_static_call_transform); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 45d72447df84..bf9e0adb5b7e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -136,6 +136,7 @@ SECTIONS ENTRY_TEXT ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT + STATIC_CALL_TEXT *(.fixup) *(.gnu.warning) |