summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/entry/common.c23
-rw-r--r--arch/x86/entry/entry_64_compat.S42
2 files changed, 60 insertions, 5 deletions
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 1b2606edc621..88dc5ba14d47 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -363,7 +363,8 @@ __visible void do_int80_syscall_32(struct pt_regs *regs)
syscall_return_slowpath(regs);
}
-__visible void do_fast_syscall_32(struct pt_regs *regs)
+/* Returns 0 to return using IRET or 1 to return using SYSRETL. */
+__visible long do_fast_syscall_32(struct pt_regs *regs)
{
/*
* Called using the internal vDSO SYSENTER/SYSCALL32 calling
@@ -395,12 +396,28 @@ __visible void do_fast_syscall_32(struct pt_regs *regs)
enter_from_user_mode();
#endif
prepare_exit_to_usermode(regs);
- return;
+ return 0; /* Keep it simple: use IRET. */
}
local_irq_disable();
/* Now this is just like a normal syscall. */
do_int80_syscall_32(regs);
- return;
+
+#ifdef CONFIG_X86_64
+ /*
+ * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
+ * SYSRETL is available on all 64-bit CPUs, so we don't need to
+ * bother with SYSEXIT.
+ *
+ * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
+ * because the ECX fixup above will ensure that this is essentially
+ * never the case.
+ */
+ return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
+ regs->ip == landing_pad &&
+ (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
+#else
+ return 0;
+#endif
}
#endif
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 8f109de51d03..cf9641cd4796 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -115,7 +115,9 @@ sysenter_flags_fixed:
movq %rsp, %rdi
call do_fast_syscall_32
- jmp .Lsyscall_32_done
+ testl %eax, %eax
+ jz .Lsyscall_32_done
+ jmp sysret32_from_system_call
sysenter_fix_flags:
pushq $X86_EFLAGS_FIXED
@@ -192,7 +194,43 @@ ENTRY(entry_SYSCALL_compat)
movq %rsp, %rdi
call do_fast_syscall_32
- jmp .Lsyscall_32_done
+ testl %eax, %eax
+ jz .Lsyscall_32_done
+
+ /* Opportunistic SYSRET */
+sysret32_from_system_call:
+ TRACE_IRQS_ON /* User mode traces as IRQs on. */
+ movq RBX(%rsp), %rbx /* pt_regs->rbx */
+ movq RBP(%rsp), %rbp /* pt_regs->rbp */
+ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
+ movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */
+ addq $RAX, %rsp /* Skip r8-r15 */
+ popq %rax /* pt_regs->rax */
+ popq %rdx /* Skip pt_regs->cx */
+ popq %rdx /* pt_regs->dx */
+ popq %rsi /* pt_regs->si */
+ popq %rdi /* pt_regs->di */
+
+ /*
+ * USERGS_SYSRET32 does:
+ * GSBASE = user's GS base
+ * EIP = ECX
+ * RFLAGS = R11
+ * CS = __USER32_CS
+ * SS = __USER_DS
+ *
+ * ECX will not match pt_regs->cx, but we're returning to a vDSO
+ * trampoline that will fix up RCX, so this is okay.
+ *
+ * R12-R15 are callee-saved, so they contain whatever was in them
+ * when the system call started, which is already known to user
+ * code. We zero R8-R10 to avoid info leaks.
+ */
+ xorq %r8, %r8
+ xorq %r9, %r9
+ xorq %r10, %r10
+ movq RSP-ORIG_RAX(%rsp), %rsp
+ USERGS_SYSRET32
END(entry_SYSCALL_compat)
/*