summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-livepatch8
-rw-r--r--Documentation/filesystems/proc.txt18
-rw-r--r--Documentation/livepatch/livepatch.txt214
-rw-r--r--arch/Kconfig6
-rw-r--r--arch/powerpc/include/asm/thread_info.h4
-rw-r--r--arch/powerpc/kernel/signal.c4
-rw-r--r--arch/s390/include/asm/thread_info.h24
-rw-r--r--arch/s390/kernel/entry.S30
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/common.c9
-rw-r--r--arch/x86/include/asm/thread_info.h13
-rw-r--r--arch/x86/include/asm/unwind.h6
-rw-r--r--arch/x86/kernel/stacktrace.c96
-rw-r--r--arch/x86/kernel/unwind_frame.c2
-rw-r--r--fs/proc/base.c15
-rw-r--r--include/linux/init_task.h9
-rw-r--r--include/linux/livepatch.h68
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/stacktrace.h9
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/livepatch/Makefile2
-rw-r--r--kernel/livepatch/core.c450
-rw-r--r--kernel/livepatch/core.h6
-rw-r--r--kernel/livepatch/patch.c272
-rw-r--r--kernel/livepatch/patch.h33
-rw-r--r--kernel/livepatch/transition.c553
-rw-r--r--kernel/livepatch/transition.h14
-rw-r--r--kernel/sched/idle.c4
-rw-r--r--kernel/stacktrace.c12
-rw-r--r--samples/livepatch/livepatch-sample.c18
30 files changed, 1547 insertions, 359 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch
index da87f43aec58..d5d39748382f 100644
--- a/Documentation/ABI/testing/sysfs-kernel-livepatch
+++ b/Documentation/ABI/testing/sysfs-kernel-livepatch
@@ -25,6 +25,14 @@ Description:
code is currently applied. Writing 0 will disable the patch
while writing 1 will re-enable the patch.
+What: /sys/kernel/livepatch/<patch>/transition
+Date: Feb 2017
+KernelVersion: 4.12.0
+Contact: live-patching@vger.kernel.org
+Description:
+ An attribute which indicates whether the patch is currently in
+ transition.
+
What: /sys/kernel/livepatch/<patch>/<object>
Date: Nov 2014
KernelVersion: 3.19.0
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index c94b4675d021..9036dbf16156 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -44,6 +44,7 @@ Table of Contents
3.8 /proc/<pid>/fdinfo/<fd> - Information about opened file
3.9 /proc/<pid>/map_files - Information about memory mapped files
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
+ 3.11 /proc/<pid>/patch_state - Livepatch patch operation state
4 Configuring procfs
4.1 Mount options
@@ -1887,6 +1888,23 @@ Valid values are from 0 - ULLONG_MAX
An application setting the value must have PTRACE_MODE_ATTACH_FSCREDS level
permissions on the task specified to change its timerslack_ns value.
+3.11 /proc/<pid>/patch_state - Livepatch patch operation state
+-----------------------------------------------------------------
+When CONFIG_LIVEPATCH is enabled, this file displays the value of the
+patch state for the task.
+
+A value of '-1' indicates that no patch is in transition.
+
+A value of '0' indicates that a patch is in transition and the task is
+unpatched. If the patch is being enabled, then the task hasn't been
+patched yet. If the patch is being disabled, then the task has already
+been unpatched.
+
+A value of '1' indicates that a patch is in transition and the task is
+patched. If the patch is being enabled, then the task has already been
+patched. If the patch is being disabled, then the task hasn't been
+unpatched yet.
+
------------------------------------------------------------------------------
Configuring procfs
diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt
index 9d2096c7160d..ecdb18104ab0 100644
--- a/Documentation/livepatch/livepatch.txt
+++ b/Documentation/livepatch/livepatch.txt
@@ -72,7 +72,8 @@ example, they add a NULL pointer or a boundary check, fix a race by adding
a missing memory barrier, or add some locking around a critical section.
Most of these changes are self contained and the function presents itself
the same way to the rest of the system. In this case, the functions might
-be updated independently one by one.
+be updated independently one by one. (This can be done by setting the
+'immediate' flag in the klp_patch struct.)
But there are more complex fixes. For example, a patch might change
ordering of locking in multiple functions at the same time. Or a patch
@@ -86,20 +87,141 @@ or no data are stored in the modified structures at the moment.
The theory about how to apply functions a safe way is rather complex.
The aim is to define a so-called consistency model. It attempts to define
conditions when the new implementation could be used so that the system
-stays consistent. The theory is not yet finished. See the discussion at
-https://lkml.kernel.org/r/20141107140458.GA21774@suse.cz
-
-The current consistency model is very simple. It guarantees that either
-the old or the new function is called. But various functions get redirected
-one by one without any synchronization.
-
-In other words, the current implementation _never_ modifies the behavior
-in the middle of the call. It is because it does _not_ rewrite the entire
-function in the memory. Instead, the function gets redirected at the
-very beginning. But this redirection is used immediately even when
-some other functions from the same patch have not been redirected yet.
-
-See also the section "Limitations" below.
+stays consistent.
+
+Livepatch has a consistency model which is a hybrid of kGraft and
+kpatch: it uses kGraft's per-task consistency and syscall barrier
+switching combined with kpatch's stack trace switching. There are also
+a number of fallback options which make it quite flexible.
+
+Patches are applied on a per-task basis, when the task is deemed safe to
+switch over. When a patch is enabled, livepatch enters into a
+transition state where tasks are converging to the patched state.
+Usually this transition state can complete in a few seconds. The same
+sequence occurs when a patch is disabled, except the tasks converge from
+the patched state to the unpatched state.
+
+An interrupt handler inherits the patched state of the task it
+interrupts. The same is true for forked tasks: the child inherits the
+patched state of the parent.
+
+Livepatch uses several complementary approaches to determine when it's
+safe to patch tasks:
+
+1. The first and most effective approach is stack checking of sleeping
+ tasks. If no affected functions are on the stack of a given task,
+ the task is patched. In most cases this will patch most or all of
+ the tasks on the first try. Otherwise it'll keep trying
+ periodically. This option is only available if the architecture has
+ reliable stacks (HAVE_RELIABLE_STACKTRACE).
+
+2. The second approach, if needed, is kernel exit switching. A
+ task is switched when it returns to user space from a system call, a
+ user space IRQ, or a signal. It's useful in the following cases:
+
+ a) Patching I/O-bound user tasks which are sleeping on an affected
+ function. In this case you have to send SIGSTOP and SIGCONT to
+ force it to exit the kernel and be patched.
+ b) Patching CPU-bound user tasks. If the task is highly CPU-bound
+ then it will get patched the next time it gets interrupted by an
+ IRQ.
+ c) In the future it could be useful for applying patches for
+ architectures which don't yet have HAVE_RELIABLE_STACKTRACE. In
+ this case you would have to signal most of the tasks on the
+ system. However this isn't supported yet because there's
+ currently no way to patch kthreads without
+ HAVE_RELIABLE_STACKTRACE.
+
+3. For idle "swapper" tasks, since they don't ever exit the kernel, they
+ instead have a klp_update_patch_state() call in the idle loop which
+ allows them to be patched before the CPU enters the idle state.
+
+ (Note there's not yet such an approach for kthreads.)
+
+All the above approaches may be skipped by setting the 'immediate' flag
+in the 'klp_patch' struct, which will disable per-task consistency and
+patch all tasks immediately. This can be useful if the patch doesn't
+change any function or data semantics. Note that, even with this flag
+set, it's possible that some tasks may still be running with an old
+version of the function, until that function returns.
+
+There's also an 'immediate' flag in the 'klp_func' struct which allows
+you to specify that certain functions in the patch can be applied
+without per-task consistency. This might be useful if you want to patch
+a common function like schedule(), and the function change doesn't need
+consistency but the rest of the patch does.
+
+For architectures which don't have HAVE_RELIABLE_STACKTRACE, the user
+must set patch->immediate which causes all tasks to be patched
+immediately. This option should be used with care, only when the patch
+doesn't change any function or data semantics.
+
+In the future, architectures which don't have HAVE_RELIABLE_STACKTRACE
+may be allowed to use per-task consistency if we can come up with
+another way to patch kthreads.
+
+The /sys/kernel/livepatch/<patch>/transition file shows whether a patch
+is in transition. Only a single patch (the topmost patch on the stack)
+can be in transition at a given time. A patch can remain in transition
+indefinitely, if any of the tasks are stuck in the initial patch state.
+
+A transition can be reversed and effectively canceled by writing the
+opposite value to the /sys/kernel/livepatch/<patch>/enabled file while
+the transition is in progress. Then all the tasks will attempt to
+converge back to the original patch state.
+
+There's also a /proc/<pid>/patch_state file which can be used to
+determine which tasks are blocking completion of a patching operation.
+If a patch is in transition, this file shows 0 to indicate the task is
+unpatched and 1 to indicate it's patched. Otherwise, if no patch is in
+transition, it shows -1. Any tasks which are blocking the transition
+can be signaled with SIGSTOP and SIGCONT to force them to change their
+patched state.
+
+
+3.1 Adding consistency model support to new architectures
+---------------------------------------------------------
+
+For adding consistency model support to new architectures, there are a
+few options:
+
+1) Add CONFIG_HAVE_RELIABLE_STACKTRACE. This means porting objtool, and
+ for non-DWARF unwinders, also making sure there's a way for the stack
+ tracing code to detect interrupts on the stack.
+
+2) Alternatively, ensure that every kthread has a call to
+ klp_update_patch_state() in a safe location. Kthreads are typically
+ in an infinite loop which does some action repeatedly. The safe
+ location to switch the kthread's patch state would be at a designated
+ point in the loop where there are no locks taken and all data
+ structures are in a well-defined state.
+
+ The location is clear when using workqueues or the kthread worker
+ API. These kthreads process independent actions in a generic loop.
+
+ It's much more complicated with kthreads which have a custom loop.
+ There the safe location must be carefully selected on a case-by-case
+ basis.
+
+ In that case, arches without HAVE_RELIABLE_STACKTRACE would still be
+ able to use the non-stack-checking parts of the consistency model:
+
+ a) patching user tasks when they cross the kernel/user space
+ boundary; and
+
+ b) patching kthreads and idle tasks at their designated patch points.
+
+ This option isn't as good as option 1 because it requires signaling
+ user tasks and waking kthreads to patch them. But it could still be
+ a good backup option for those architectures which don't have
+ reliable stack traces yet.
+
+In the meantime, patches for such architectures can bypass the
+consistency model by setting klp_patch.immediate to true. This option
+is perfectly fine for patches which don't change the semantics of the
+patched functions. In practice, this is usable for ~90% of security
+fixes. Use of this option also means the patch can't be unloaded after
+it has been disabled.
4. Livepatch module
@@ -134,7 +256,7 @@ Documentation/livepatch/module-elf-format.txt for more details.
4.2. Metadata
-------------
+-------------
The patch is described by several structures that split the information
into three levels:
@@ -156,6 +278,9 @@ into three levels:
only for a particular object ( vmlinux or a kernel module ). Note that
kallsyms allows for searching symbols according to the object name.
+ There's also an 'immediate' flag which, when set, patches the
+ function immediately, bypassing the consistency model safety checks.
+
+ struct klp_object defines an array of patched functions (struct
klp_func) in the same object. Where the object is either vmlinux
(NULL) or a module name.
@@ -172,10 +297,13 @@ into three levels:
This structure handles all patched functions consistently and eventually,
synchronously. The whole patch is applied only when all patched
symbols are found. The only exception are symbols from objects
- (kernel modules) that have not been loaded yet. Also if a more complex
- consistency model is supported then a selected unit (thread,
- kernel as a whole) will see the new code from the entire patch
- only when it is in a safe state.
+ (kernel modules) that have not been loaded yet.
+
+ Setting the 'immediate' flag applies the patch to all tasks
+ immediately, bypassing the consistency model safety checks.
+
+ For more details on how the patch is applied on a per-task basis,
+ see the "Consistency model" section.
4.3. Livepatch module handling
@@ -188,8 +316,15 @@ section "Livepatch life-cycle" below for more details about these
two operations.
Module removal is only safe when there are no users of the underlying
-functions. The immediate consistency model is not able to detect this;
-therefore livepatch modules cannot be removed. See "Limitations" below.
+functions. The immediate consistency model is not able to detect this. The
+code just redirects the functions at the very beginning and it does not
+check if the functions are in use. In other words, it knows when the
+functions get called but it does not know when the functions return.
+Therefore it cannot be decided when the livepatch module can be safely
+removed. This is solved by a hybrid consistency model. When the system is
+transitioned to a new patch state (patched/unpatched) it is guaranteed that
+no task sleeps or runs in the old code.
+
5. Livepatch life-cycle
=======================
@@ -239,9 +374,15 @@ Registered patches might be enabled either by calling klp_enable_patch() or
by writing '1' to /sys/kernel/livepatch/<name>/enabled. The system will
start using the new implementation of the patched functions at this stage.
-In particular, if an original function is patched for the first time, a
-function specific struct klp_ops is created and an universal ftrace handler
-is registered.
+When a patch is enabled, livepatch enters into a transition state where
+tasks are converging to the patched state. This is indicated by a value
+of '1' in /sys/kernel/livepatch/<name>/transition. Once all tasks have
+been patched, the 'transition' value changes to '0'. For more
+information about this process, see the "Consistency model" section.
+
+If an original function is patched for the first time, a function
+specific struct klp_ops is created and an universal ftrace handler is
+registered.
Functions might be patched multiple times. The ftrace handler is registered
only once for the given function. Further patches just add an entry to the
@@ -261,6 +402,12 @@ by writing '0' to /sys/kernel/livepatch/<name>/enabled. At this stage
either the code from the previously enabled patch or even the original
code gets used.
+When a patch is disabled, livepatch enters into a transition state where
+tasks are converging to the unpatched state. This is indicated by a
+value of '1' in /sys/kernel/livepatch/<name>/transition. Once all tasks
+have been unpatched, the 'transition' value changes to '0'. For more
+information about this process, see the "Consistency model" section.
+
Here all the functions (struct klp_func) associated with the to-be-disabled
patch are removed from the corresponding struct klp_ops. The ftrace handler
is unregistered and the struct klp_ops is freed when the func_stack list
@@ -329,23 +476,6 @@ The current Livepatch implementation has several limitations:
by "notrace".
- + Livepatch modules can not be removed.
-
- The current implementation just redirects the functions at the very
- beginning. It does not check if the functions are in use. In other
- words, it knows when the functions get called but it does not
- know when the functions return. Therefore it can not decide when
- the livepatch module can be safely removed.
-
- This will get most likely solved once a more complex consistency model
- is supported. The idea is that a safe state for patching should also
- mean a safe state for removing the patch.
-
- Note that the patch itself might get disabled by writing zero
- to /sys/kernel/livepatch/<patch>/enabled. It causes that the new
- code will not longer get called. But it does not guarantee
- that anyone is not sleeping anywhere in the new code.
-
+ Livepatch works reliably only when the dynamic ftrace is located at
the very beginning of the function.
diff --git a/arch/Kconfig b/arch/Kconfig
index c4d6833aacd9..640999412d11 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -720,6 +720,12 @@ config HAVE_STACK_VALIDATION
Architecture supports the 'objtool check' host tool command, which
performs compile-time stack metadata validation.
+config HAVE_RELIABLE_STACKTRACE
+ bool
+ help
+ Architecture has a save_stack_trace_tsk_reliable() function which
+ only returns a stack trace if it can guarantee the trace is reliable.
+
config HAVE_ARCH_HASH
bool
default n
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 87e4b2d8dcd4..6fc6464f7421 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -92,6 +92,7 @@ static inline struct thread_info *current_thread_info(void)
TIF_NEED_RESCHED */
#define TIF_32BIT 4 /* 32 bit binary */
#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
+#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
#define TIF_NOHZ 9 /* in adaptive nohz mode */
@@ -115,6 +116,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_32BIT (1<<TIF_32BIT)
#define _TIF_RESTORE_TM (1<<TIF_RESTORE_TM)
+#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
@@ -131,7 +133,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_RESTORE_TM)
+ _TIF_RESTORE_TM | _TIF_PATCH_PENDING)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
/* Bits in local_flags */
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 3a3671172436..e9436c5e1e09 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -14,6 +14,7 @@
#include <linux/uprobes.h>
#include <linux/key.h>
#include <linux/context_tracking.h>
+#include <linux/livepatch.h>
#include <asm/hw_breakpoint.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -162,6 +163,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
tracehook_notify_resume(regs);
}
+ if (thread_info_flags & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
+
user_enter();
}
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index f36e6e2b73f0..0b3ee083a665 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -51,15 +51,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
/*
* thread information flags bit numbers
*/
+/* _TIF_WORK bits */
#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_UPROBE 3 /* breakpointed or single-stepping */
#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
-#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
-#define TIF_SECCOMP 10 /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
+#define TIF_PATCH_PENDING 5 /* pending live patching update */
+
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
@@ -67,16 +66,25 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_BLOCK_STEP 20 /* This task is block stepped */
#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
+/* _TIF_TRACE bits */
+#define TIF_SYSCALL_TRACE 24 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */
+#define TIF_SECCOMP 26 /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */
+
#define _TIF_NOTIFY_RESUME _BITUL(TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING _BITUL(TIF_SIGPENDING)
#define _TIF_NEED_RESCHED _BITUL(TIF_NEED_RESCHED)
+#define _TIF_UPROBE _BITUL(TIF_UPROBE)
+#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
+#define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING)
+
+#define _TIF_31BIT _BITUL(TIF_31BIT)
+#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
+
#define _TIF_SYSCALL_TRACE _BITUL(TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT _BITUL(TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP _BITUL(TIF_SECCOMP)
#define _TIF_SYSCALL_TRACEPOINT _BITUL(TIF_SYSCALL_TRACEPOINT)
-#define _TIF_UPROBE _BITUL(TIF_UPROBE)
-#define _TIF_31BIT _BITUL(TIF_31BIT)
-#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
-#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index c6cf338c9327..a5f5d3bb3dbc 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_UPROBE | _TIF_GUARDED_STORAGE)
+ _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -334,6 +334,11 @@ ENTRY(system_call)
jo .Lsysc_guarded_storage
TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
jo .Lsysc_singlestep
+#ifdef CONFIG_LIVEPATCH
+ TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING
+ jo .Lsysc_patch_pending # handle live patching just before
+ # signals and possible syscall restart
+#endif
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
jo .Lsysc_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
@@ -415,6 +420,15 @@ ENTRY(system_call)
lgr %r2,%r11 # pass pointer to pt_regs
larl %r14,.Lsysc_return
jg gs_load_bc_cb
+#
+# _TIF_PATCH_PENDING is set, call klp_update_patch_state
+#
+#ifdef CONFIG_LIVEPATCH
+.Lsysc_patch_pending:
+ lg %r2,__LC_CURRENT # pass pointer to task struct
+ larl %r14,.Lsysc_return
+ jg klp_update_patch_state
+#endif
#
# _PIF_PER_TRAP is set, call do_per_trap
@@ -667,6 +681,10 @@ ENTRY(io_int_handler)
jo .Lio_mcck_pending
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lio_reschedule
+#ifdef CONFIG_LIVEPATCH
+ TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING
+ jo .Lio_patch_pending
+#endif
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
jo .Lio_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
@@ -730,6 +748,16 @@ ENTRY(io_int_handler)
j .Lio_return
#
+# _TIF_PATCH_PENDING is set, call klp_update_patch_state
+#
+#ifdef CONFIG_LIVEPATCH
+.Lio_patch_pending:
+ lg %r2,__LC_CURRENT # pass pointer to task struct
+ larl %r14,.Lio_return
+ jg klp_update_patch_state
+#endif
+
+#
# _TIF_SIGPENDING or is set, call do_signal
#
.Lio_sigpending:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8d4f87e5bba3..cd18994a9555 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -160,6 +160,7 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER && STACK_VALIDATION
select HAVE_STACK_VALIDATION if X86_64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..cdefcfdd9e63 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
#include <linux/context_tracking.h>
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
+#include <linux/livepatch.h>
#include <asm/desc.h>
#include <asm/traps.h>
@@ -130,14 +131,13 @@ static long syscall_trace_enter(struct pt_regs *regs)
#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
+ _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
{
/*
* In order to return to user mode, we need to have IRQs off with
- * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY,
- * _TIF_UPROBE, or _TIF_NEED_RESCHED set. Several of these flags
+ * none of EXIT_TO_USERMODE_LOOP_FLAGS set. Several of these flags
* can be set at any time on preemptable kernels if we have IRQs on,
* so we need to loop. Disabling preemption wouldn't help: doing the
* work to clear some of the flags can sleep.
@@ -164,6 +164,9 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
if (cached_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
+ if (cached_flags & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
+
/* Disable IRQs and retry */
local_irq_disable();
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f765a49103fb..e00e1bd6e7b3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -73,9 +73,6 @@ struct thread_info {
* thread information flags
* - these are process state flags that various assembly files
* may need to access
- * - pending work-to-be-done flags are in LSW
- * - other flags in MSW
- * Warning: layout of LSW is hardcoded in entry.S
*/
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
@@ -87,6 +84,7 @@ struct thread_info {
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
+#define TIF_PATCH_PENDING 13 /* pending live patching update */
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
@@ -104,13 +102,14 @@ struct thread_info {
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
@@ -135,8 +134,10 @@ struct thread_info {
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
- ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
+ _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \
+ _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \
+ _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 9b10dcd51716..e6676495b125 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -11,6 +11,7 @@ struct unwind_state {
unsigned long stack_mask;
struct task_struct *task;
int graph_idx;
+ bool error;
#ifdef CONFIG_FRAME_POINTER
bool got_irq;
unsigned long *bp, *orig_sp;
@@ -42,6 +43,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
__unwind_start(state, task, regs, first_frame);
}
+static inline bool unwind_error(struct unwind_state *state)
+{
+ return state->error;
+}
+
#ifdef CONFIG_FRAME_POINTER
static inline
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 8e2b79b88e51..8dabd7bf1673 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -76,6 +76,101 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+
+#define STACKTRACE_DUMP_ONCE(task) ({ \
+ static bool __section(.data.unlikely) __dumped; \
+ \
+ if (!__dumped) { \
+ __dumped = true; \
+ WARN_ON(1); \
+ show_stack(task, NULL); \
+ } \
+})
+
+static int __save_stack_trace_reliable(struct stack_trace *trace,
+ struct task_struct *task)
+{
+ struct unwind_state state;
+ struct pt_regs *regs;
+ unsigned long addr;
+
+ for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
+ unwind_next_frame(&state)) {
+
+ regs = unwind_get_entry_regs(&state);
+ if (regs) {
+ /*
+ * Kernel mode registers on the stack indicate an
+ * in-kernel interrupt or exception (e.g., preemption
+ * or a page fault), which can make frame pointers
+ * unreliable.
+ */
+ if (!user_mode(regs))
+ return -EINVAL;
+
+ /*
+ * The last frame contains the user mode syscall
+ * pt_regs. Skip it and finish the unwind.
+ */
+ unwind_next_frame(&state);
+ if (!unwind_done(&state)) {
+ STACKTRACE_DUMP_ONCE(task);
+ return -EINVAL;
+ }
+ break;
+ }
+
+ addr = unwind_get_return_address(&state);
+
+ /*
+ * A NULL or invalid return address probably means there's some
+ * generated code which __kernel_text_address() doesn't know
+ * about.
+ */
+ if (!addr) {
+ STACKTRACE_DUMP_ONCE(task);
+ return -EINVAL;
+ }
+
+ if (save_stack_address(trace, addr, false))
+ return -EINVAL;
+ }
+
+ /* Check for stack corruption */
+ if (unwind_error(&state)) {
+ STACKTRACE_DUMP_ONCE(task);
+ return -EINVAL;
+ }
+
+ if (trace->nr_entries < trace->