summaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/atomic.h2
-rw-r--r--arch/x86/include/asm/bug.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/div64.h14
-rw-r--r--arch/x86/include/asm/fpu/internal.h47
-rw-r--r--arch/x86/include/asm/fpu/types.h27
-rw-r--r--arch/x86/include/asm/fpu/xstate.h36
-rw-r--r--arch/x86/include/asm/idtentry.h31
-rw-r--r--arch/x86/include/asm/inst.h163
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/asm/percpu.h510
-rw-r--r--arch/x86/include/asm/perf_event.h82
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/uaccess.h5
15 files changed, 371 insertions, 568 deletions
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index bf35e476a776..b6cac6e9bb70 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -14,8 +14,6 @@
* resource counting etc..
*/
-#define ATOMIC_INIT(i) { (i) }
-
/**
* arch_atomic_read - read atomic variable
* @v: pointer of type atomic_t
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 028189575560..297fa12e7e27 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -3,6 +3,7 @@
#define _ASM_X86_BUG_H
#include <linux/stringify.h>
+#include <linux/instrumentation.h>
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 02dabc9e77b0..72ba4c59ad05 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -366,6 +366,7 @@
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9b8cb50768c2..b8f1dc0761e4 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -74,16 +74,26 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
#else
# include <asm-generic/div64.h>
-static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+/*
+ * Will generate an #DE when the result doesn't fit u64, could fix with an
+ * __ex_table[] entry when it becomes an issue.
+ */
+static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
{
u64 q;
asm ("mulq %2; divq %3" : "=a" (q)
- : "a" (a), "rm" ((u64)mul), "rm" ((u64)div)
+ : "a" (a), "rm" (mul), "rm" (div)
: "rdx");
return q;
}
+#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+ return mul_u64_u64_div_u64(a, mul, div);
+}
#define mul_u64_u32_div mul_u64_u32_div
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 845e7481ab77..6b10cdaa7c96 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -274,7 +274,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
*/
static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
{
- u64 mask = -1;
+ u64 mask = xfeatures_mask_all;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
@@ -320,7 +320,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
*/
static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
{
- u64 mask = -1;
+ u64 mask = xfeatures_mask_all;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
@@ -356,6 +356,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
*/
static inline int copy_xregs_to_user(struct xregs_state __user *buf)
{
+ u64 mask = xfeatures_mask_user();
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
int err;
/*
@@ -367,7 +370,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf)
return -EFAULT;
stac();
- XSTATE_OP(XSAVE, buf, -1, -1, err);
+ XSTATE_OP(XSAVE, buf, lmask, hmask, err);
clac();
return err;
@@ -408,43 +411,7 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
return err;
}
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact and we can
- * keep registers active.
- *
- * The legacy FNSAVE instruction cleared all FPU state
- * unconditionally, so registers are essentially destroyed.
- * Modern FPU state can be kept in registers, if there are
- * no pending FP exceptions.
- */
-static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
-{
- if (likely(use_xsave())) {
- copy_xregs_to_kernel(&fpu->state.xsave);
-
- /*
- * AVX512 state is tracked here because its use is
- * known to slow the max clock speed of the core.
- */
- if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
- fpu->avx512_timestamp = jiffies;
- return 1;
- }
-
- if (likely(use_fxsr())) {
- copy_fxregs_to_kernel(fpu);
- return 1;
- }
-
- /*
- * Legacy FPU register saving, FNSAVE always clears FPU registers,
- * so we have to mark them inactive:
- */
- asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
-
- return 0;
-}
+extern int copy_fpregs_to_fpstate(struct fpu *fpu);
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
{
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index f098f6cab94b..c87364ea6446 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -114,6 +114,12 @@ enum xfeature {
XFEATURE_Hi16_ZMM,
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
XFEATURE_PKRU,
+ XFEATURE_RSRVD_COMP_10,
+ XFEATURE_RSRVD_COMP_11,
+ XFEATURE_RSRVD_COMP_12,
+ XFEATURE_RSRVD_COMP_13,
+ XFEATURE_RSRVD_COMP_14,
+ XFEATURE_LBR,
XFEATURE_MAX,
};
@@ -128,6 +134,7 @@ enum xfeature {
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
@@ -229,6 +236,26 @@ struct pkru_state {
u32 pad;
} __packed;
+/*
+ * State component 15: Architectural LBR configuration state.
+ * The size of Arch LBR state depends on the number of LBRs (lbr_depth).
+ */
+
+struct lbr_entry {
+ u64 from;
+ u64 to;
+ u64 info;
+};
+
+struct arch_lbr_state {
+ u64 lbr_ctl;
+ u64 lbr_depth;
+ u64 ler_from;
+ u64 ler_to;
+ u64 ler_info;
+ struct lbr_entry entries[];
+} __packed;
+
struct xstate_header {
u64 xfeatures;
u64 xcomp_bv;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 422d8369012a..1559554af931 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,6 +21,8 @@
#define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
+#define XSAVE_ALIGNMENT 64
+
/* All currently supported user features */
#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
@@ -36,6 +38,27 @@
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
/*
+ * A supervisor state component may not always contain valuable information,
+ * and its size may be huge. Saving/restoring such supervisor state components
+ * at each context switch can cause high CPU and space overhead, which should
+ * be avoided. Such supervisor state components should only be saved/restored
+ * on demand. The on-demand dynamic supervisor features are set in this mask.
+ *
+ * Unlike the existing supported supervisor features, a dynamic supervisor
+ * feature does not allocate a buffer in task->fpu, and the corresponding
+ * supervisor state component cannot be saved/restored at each context switch.
+ *
+ * To support a dynamic supervisor feature, a developer should follow the
+ * dos and don'ts as below:
+ * - Do dynamically allocate a buffer for the supervisor state component.
+ * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
+ * state component to/from the buffer.
+ * - Don't set the bit corresponding to the dynamic supervisor feature in
+ * IA32_XSS at run time, since it has been set at boot time.
+ */
+#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR)
+
+/*
* Unsupported supervisor features. When a supervisor feature in this mask is
* supported in the future, move it to the supported supervisor feature mask.
*/
@@ -43,6 +66,7 @@
/* All supervisor states including supported and unsupported states. */
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
+ XFEATURE_MASK_DYNAMIC | \
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
#ifdef CONFIG_X86_64
@@ -63,6 +87,14 @@ static inline u64 xfeatures_mask_user(void)
return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
}
+static inline u64 xfeatures_mask_dynamic(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_ARCH_LBR))
+ return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR;
+
+ return XFEATURE_MASK_DYNAMIC;
+}
+
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
@@ -71,11 +103,15 @@ extern void __init update_regset_xstate_info(unsigned int size,
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
+int xfeature_size(int xfeature_nr);
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
void copy_supervisor_to_kernel(struct xregs_state *xsave);
+void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
+void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
+
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
int validate_user_xstate_header(const struct xstate_header *hdr);
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 80d3b30d3ee3..d74128c964f8 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -13,8 +13,15 @@
void idtentry_enter_user(struct pt_regs *regs);
void idtentry_exit_user(struct pt_regs *regs);
-bool idtentry_enter_cond_rcu(struct pt_regs *regs);
-void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
+typedef struct idtentry_state {
+ bool exit_rcu;
+} idtentry_state_t;
+
+idtentry_state_t idtentry_enter(struct pt_regs *regs);
+void idtentry_exit(struct pt_regs *regs, idtentry_state_t state);
+
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
@@ -54,12 +61,12 @@ static __always_inline void __##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
__##func (regs); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs)
@@ -101,12 +108,12 @@ static __always_inline void __##func(struct pt_regs *regs, \
__visible noinstr void func(struct pt_regs *regs, \
unsigned long error_code) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
__##func (regs, error_code); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs, \
@@ -199,7 +206,7 @@ static __always_inline void __##func(struct pt_regs *regs, u8 vector); \
__visible noinstr void func(struct pt_regs *regs, \
unsigned long error_code) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
irq_enter_rcu(); \
@@ -207,7 +214,7 @@ __visible noinstr void func(struct pt_regs *regs, \
__##func (regs, (u8)error_code); \
irq_exit_rcu(); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs, u8 vector)
@@ -241,7 +248,7 @@ static void __##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
irq_enter_rcu(); \
@@ -249,7 +256,7 @@ __visible noinstr void func(struct pt_regs *regs) \
run_on_irqstack_cond(__##func, regs, regs); \
irq_exit_rcu(); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static noinline void __##func(struct pt_regs *regs)
@@ -270,7 +277,7 @@ static __always_inline void __##func(struct pt_regs *regs); \
\
__visible noinstr void func(struct pt_regs *regs) \
{ \
- bool rcu_exit = idtentry_enter_cond_rcu(regs); \
+ idtentry_state_t state = idtentry_enter(regs); \
\
instrumentation_begin(); \
__irq_enter_raw(); \
@@ -278,7 +285,7 @@ __visible noinstr void func(struct pt_regs *regs) \
__##func (regs); \
__irq_exit_raw(); \
instrumentation_end(); \
- idtentry_exit_cond_rcu(regs, rcu_exit); \
+ idtentry_exit(regs, state); \
} \
\
static __always_inline void __##func(struct pt_regs *regs)
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index f5a796da07f8..438ccd4f3cc4 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -12,7 +12,6 @@
#define REG_TYPE_R32 0
#define REG_TYPE_R64 1
-#define REG_TYPE_XMM 2
#define REG_TYPE_INVALID 100
.macro R32_NUM opd r32
@@ -123,77 +122,18 @@
#endif
.endm
- .macro XMM_NUM opd xmm
- \opd = REG_NUM_INVALID
- .ifc \xmm,%xmm0
- \opd = 0
- .endif
- .ifc \xmm,%xmm1
- \opd = 1
- .endif
- .ifc \xmm,%xmm2
- \opd = 2
- .endif
- .ifc \xmm,%xmm3
- \opd = 3
- .endif
- .ifc \xmm,%xmm4
- \opd = 4
- .endif
- .ifc \xmm,%xmm5
- \opd = 5
- .endif
- .ifc \xmm,%xmm6
- \opd = 6
- .endif
- .ifc \xmm,%xmm7
- \opd = 7
- .endif
- .ifc \xmm,%xmm8
- \opd = 8
- .endif
- .ifc \xmm,%xmm9
- \opd = 9
- .endif
- .ifc \xmm,%xmm10
- \opd = 10
- .endif
- .ifc \xmm,%xmm11
- \opd = 11
- .endif
- .ifc \xmm,%xmm12
- \opd = 12
- .endif
- .ifc \xmm,%xmm13
- \opd = 13
- .endif
- .ifc \xmm,%xmm14
- \opd = 14
- .endif
- .ifc \xmm,%xmm15
- \opd = 15
- .endif
- .endm
-
.macro REG_TYPE type reg
R32_NUM reg_type_r32 \reg
R64_NUM reg_type_r64 \reg
- XMM_NUM reg_type_xmm \reg
.if reg_type_r64 <> REG_NUM_INVALID
\type = REG_TYPE_R64
.elseif reg_type_r32 <> REG_NUM_INVALID
\type = REG_TYPE_R32
- .elseif reg_type_xmm <> REG_NUM_INVALID
- \type = REG_TYPE_XMM
.else
\type = REG_TYPE_INVALID
.endif
.endm
- .macro PFX_OPD_SIZE
- .byte 0x66
- .endm
-
.macro PFX_REX opd1 opd2 W=0
.if ((\opd1 | \opd2) & 8) || \W
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
@@ -203,109 +143,6 @@
.macro MODRM mod opd1 opd2
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
.endm
-
- .macro PSHUFB_XMM xmm1 xmm2
- XMM_NUM pshufb_opd1 \xmm1
- XMM_NUM pshufb_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX pshufb_opd1 pshufb_opd2
- .byte 0x0f, 0x38, 0x00
- MODRM 0xc0 pshufb_opd1 pshufb_opd2
- .endm
-
- .macro PCLMULQDQ imm8 xmm1 xmm2
- XMM_NUM clmul_opd1 \xmm1
- XMM_NUM clmul_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX clmul_opd1 clmul_opd2
- .byte 0x0f, 0x3a, 0x44
- MODRM 0xc0 clmul_opd1 clmul_opd2
- .byte \imm8
- .endm
-
- .macro PEXTRD imm8 xmm gpr
- R32_NUM extrd_opd1 \gpr
- XMM_NUM extrd_opd2 \xmm
- PFX_OPD_SIZE
- PFX_REX extrd_opd1 extrd_opd2
- .byte 0x0f, 0x3a, 0x16
- MODRM 0xc0 extrd_opd1 extrd_opd2
- .byte \imm8
- .endm
-
- .macro AESKEYGENASSIST rcon xmm1 xmm2
- XMM_NUM aeskeygen_opd1 \xmm1
- XMM_NUM aeskeygen_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aeskeygen_opd1 aeskeygen_opd2
- .byte 0x0f, 0x3a, 0xdf
- MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
- .byte \rcon
- .endm
-
- .macro AESIMC xmm1 xmm2
- XMM_NUM aesimc_opd1 \xmm1
- XMM_NUM aesimc_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesimc_opd1 aesimc_opd2
- .byte 0x0f, 0x38, 0xdb
- MODRM 0xc0 aesimc_opd1 aesimc_opd2
- .endm
-
- .macro AESENC xmm1 xmm2
- XMM_NUM aesenc_opd1 \xmm1
- XMM_NUM aesenc_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesenc_opd1 aesenc_opd2
- .byte 0x0f, 0x38, 0xdc
- MODRM 0xc0 aesenc_opd1 aesenc_opd2
- .endm
-
- .macro AESENCLAST xmm1 xmm2
- XMM_NUM aesenclast_opd1 \xmm1
- XMM_NUM aesenclast_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesenclast_opd1 aesenclast_opd2
- .byte 0x0f, 0x38, 0xdd
- MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
- .endm
-
- .macro AESDEC xmm1 xmm2
- XMM_NUM aesdec_opd1 \xmm1
- XMM_NUM aesdec_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesdec_opd1 aesdec_opd2
- .byte 0x0f, 0x38, 0xde
- MODRM 0xc0 aesdec_opd1 aesdec_opd2
- .endm
-
- .macro AESDECLAST xmm1 xmm2
- XMM_NUM aesdeclast_opd1 \xmm1
- XMM_NUM aesdeclast_opd2 \xmm2
- PFX_OPD_SIZE
- PFX_REX aesdeclast_opd1 aesdeclast_opd2
- .byte 0x0f, 0x38, 0xdf
- MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
- .endm
-
- .macro MOVQ_R64_XMM opd1 opd2
- REG_TYPE movq_r64_xmm_opd1_type \opd1
- .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
- XMM_NUM movq_r64_xmm_opd1 \opd1
- R64_NUM movq_r64_xmm_opd2 \opd2
- .else
- R64_NUM movq_r64_xmm_opd1 \opd1
- XMM_NUM movq_r64_xmm_opd2 \opd2
- .endif
- PFX_OPD_SIZE
- PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
- .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
- .byte 0x0f, 0x7e
- .else
- .byte 0x0f, 0x6e
- .endif
- MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
- .endm
#endif
#endif
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 073eb7ad2f56..143bc9abe99c 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -66,6 +66,8 @@ struct arch_specific_insn {
*/
bool boostable;
bool if_modifier;
+ /* Number of bytes of text poked */
+ int tp_len;
};
struct arch_optimized_insn {
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e8370e64a155..bdc07fc6e517 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -158,7 +158,23 @@
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60)
#define LBR_INFO_CYCLES 0xffff
+#define LBR_INFO_BR_TYPE_OFFSET 56
+#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+
+#define MSR_ARCH_LBR_CTL 0x000014ce
+#define ARCH_LBR_CTL_LBREN BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET 1
+#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET 3
+#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET 16
+#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH 0x000014cf
+#define MSR_ARCH_LBR_FROM_0 0x00001500
+#define MSR_ARCH_LBR_TO_0 0x00001600
+#define MSR_ARCH_LBR_INFO_0 0x00001200
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_PEBS_DATA_CFG 0x000003f2
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2278797c769d..a3c33b79fb86 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -4,33 +4,15 @@
#ifdef CONFIG_X86_64
#define __percpu_seg gs
-#define __percpu_mov_op movq
#else
#define __percpu_seg fs
-#define __percpu_mov_op movl
#endif
#ifdef __ASSEMBLY__
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
#ifdef CONFIG_SMP
-#define PER_CPU(var, reg) \
- __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
- lea var(reg), reg
#define PER_CPU_VAR(var) %__percpu_seg:var
#else /* ! SMP */
-#define PER_CPU(var, reg) __percpu_mov_op $var, reg
#define PER_CPU_VAR(var) var
#endif /* SMP */
@@ -85,213 +67,108 @@
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(qual, op, var, val) \
-do { \
- typedef typeof(var) pto_T__; \
- if (0) { \
- pto_T__ pto_tmp__; \
- pto_tmp__ = (val); \
- (void)pto_tmp__; \
- } \
- switch (sizeof(var)) { \
- case 1: \
- asm qual (op "b %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "qi" ((pto_T__)(val))); \
- break; \
- case 2: \
- asm qual (op "w %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pto_T__)(val))); \
- break; \
- case 4: \
- asm qual (op "l %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pto_T__)(val))); \
- break; \
- case 8: \
- asm qual (op "q %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "re" ((pto_T__)(val))); \
- break; \
- default: __bad_percpu_size(); \
- } \
+
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
+
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
+#define __pcpu_cast_8(val) ((u64)(val))
+
+#define __pcpu_op1_1(op, dst) op "b " dst
+#define __pcpu_op1_2(op, dst) op "w " dst
+#define __pcpu_op1_4(op, dst) op "l " dst
+#define __pcpu_op1_8(op, dst) op "q " dst
+
+#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
+#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
+#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
+#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
+
+#define percpu_to_op(size, qual, op, _var, _val) \
+do { \
+ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ if (0) { \
+ typeof(_var) pto_tmp__; \
+ pto_tmp__ = (_val); \
+ (void)pto_tmp__; \
+ } \
+ asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
+ : [var] "+m" (_var) \
+ : [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
+#define percpu_unary_op(size, qual, op, _var) \
+({ \
+ asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \
+ : [var] "+m" (_var)); \
+})
+
/*
* Generate a percpu add to memory instruction and optimize code
* if one is added or subtracted.
*/
-#define percpu_add_op(qual, var, val) \
+#define percpu_add_op(size, qual, var, val) \
do { \
- typedef typeof(var) pao_T__; \
const int pao_ID__ = (__builtin_constant_p(val) && \
((val) == 1 || (val) == -1)) ? \
(int)(val) : 0; \
if (0) { \
- pao_T__ pao_tmp__; \
+ typeof(var) pao_tmp__; \
pao_tmp__ = (val); \
(void)pao_tmp__; \
} \
- switch (sizeof(var)) { \
- case 1: \
- if (pao_ID__ == 1) \
- asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addb %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "qi" ((pao_T__)(val))); \
- break; \
- case 2: \
- if (pao_ID__ == 1) \
- asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addw %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pao_T__)(val))); \
- break; \
- case 4: \
- if (pao_ID__ == 1) \
- asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addl %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pao_T__)(val))); \
- break; \
- case 8: \
- if (pao_ID__ == 1) \
- asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addq %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "re" ((pao_T__)(val))); \
- break; \
- default: __bad_percpu_size(); \
- } \
+ if (pao_ID__ == 1) \
+ percpu_unary_op(size, qual, "inc", var); \
+ else if (pao_ID__ == -1) \
+ percpu_unary_op(size, qual, "dec", var); \
+ else \
+ percpu_to_op(size, qual, "add", var, val); \
} while (0)
-#define percpu_from_op(qual, op, var) \
-({ \
- typeof(var) pfo_ret__; \
- switch (sizeof(var)) { \
- case 1: \
- asm qual (op "b "__percpu_arg(1)",%0" \
- : "=q" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 2: \
- asm qual (op "w "__percpu_arg(1)",%0" \
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 4: \
- asm qual (op "l "__percpu_arg(1)",%0" \
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 8: \
- asm qual (op "q "__percpu_arg(1)",%0" \
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pfo_ret__; \
-})
-
-#define percpu_stable_op(op, var) \
-({ \
- typeof(var) pfo_ret__; \
- switch (sizeof(var)) { \
- case 1: \
- asm(op "b "__percpu_arg(P1)",%0" \
- : "=q" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 2: \
- asm(op "w "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 4: \
- asm(op "l "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 8: \
- asm(op "q "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pfo_ret__; \
+#define percpu_from_op(size, qual, op, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "m" (_var)); \
+ (typeof(_var))(unsigned long) pfo_val__; \
})
-#define percpu_unary_op(qual, op, var) \
-({ \
- switch (sizeof(var)) { \
- case 1: \
- asm qual (op "b "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 2: \
- asm qual (op "w "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 4: \
- asm qual (op "l "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 8: \
- asm qual (op "q "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- default: __bad_percpu_size(); \
- } \
+#define percpu_stable_op(size, op, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "p" (&(_var))); \
+ (typeof(_var))(unsigned long) pfo_val__; \
})
/*
* Add return operation
*/
-#define percpu_add_return_op(qual, var, val) \
+#define percpu_add_return_op(size, qual, _var, _val) \
({ \
- typeof(var) paro_ret__ = val; \
- switch (sizeof(var)) { \
- case 1: \
- asm qual ("xaddb %0, "__percpu_arg(1) \
- : "+q" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 2: \
- asm qual ("xaddw %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 4: \
- asm qual ("xaddl %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 8: \
- asm qual ("xaddq %0, "__percpu_arg(1) \
- : "+re" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- paro_ret__ += val; \
- paro_ret__; \
+ __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \
+ asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
+ __percpu_arg([var])) \
+ : [tmp] __pcpu_reg_##size("+", paro_tmp__), \
+ [var] "+m" (_var) \
+ : : "memory"); \
+ (typeof(_var))(unsigned long) (paro_tmp__ + _val); \
})
/*
@@ -299,85 +176,38 @@ do { \
* expensive due to the implied lock prefix. The processor cannot prefetch
* cachelines if xchg is used.
*/
-#define percpu_xchg_op(qual, var, nval) \
+#define percpu_xchg_op(size, qual, _var, _nval) \
({ \
- typeof(var) pxo_ret__; \
- typeof(var) pxo_new__ = (nval); \
- switch (sizeof(var)) { \
- case 1: \
- asm qual ("\n\tmov "__percpu_arg(1)",%%al" \
- "\n1:\tcmpxchgb %2, "__percpu_arg(1)