aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2025-09-30 13:37:14 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2025-09-30 13:37:14 -0400
commit12abeb81c87331bf53940947694cc51b7507aa38 (patch)
treeca91361e41e5842c4270ce34e70991729bdd2bde /arch/x86/include
parentMerge tag 'kvm-x86-misc-6.18' of https://github.com/kvm-x86/linux into HEAD (diff)
parentKVM: VMX: Make CR4.CET a guest owned bit (diff)
downloadlinux-12abeb81c87331bf53940947694cc51b7507aa38.tar.gz
linux-12abeb81c87331bf53940947694cc51b7507aa38.zip
Merge tag 'kvm-x86-cet-6.18' of https://github.com/kvm-x86/linux into HEAD
KVM x86 CET virtualization support for 6.18 Add support for virtualizing Control-flow Enforcement Technology (CET) on Intel (Shadow Stacks and Indirect Branch Tracking) and AMD (Shadow Stacks). CET is comprised of two distinct features, Shadow Stacks (SHSTK) and Indirect Branch Tracking (IBT), that can be utilized by software to help provide Control-flow integrity (CFI). SHSTK defends against backward-edge attacks (a.k.a. Return-oriented programming (ROP)), while IBT defends against forward-edge attacks (a.k.a. similarly CALL/JMP-oriented programming (COP/JOP)). Attackers commonly use ROP and COP/JOP methodologies to redirect the control- flow to unauthorized targets in order to execute small snippets of code, a.k.a. gadgets, of the attackers choice. By chaining together several gadgets, an attacker can perform arbitrary operations and circumvent the system's defenses. SHSTK defends against backward-edge attacks, which execute gadgets by modifying the stack to branch to the attacker's target via RET, by providing a second stack that is used exclusively to track control transfer operations. The shadow stack is separate from the data/normal stack, and can be enabled independently in user and kernel mode. When SHSTK is is enabled, CALL instructions push the return address on both the data and shadow stack. RET then pops the return address from both stacks and compares the addresses. If the return addresses from the two stacks do not match, the CPU generates a Control Protection (#CP) exception. IBT defends against backward-edge attacks, which branch to gadgets by executing indirect CALL and JMP instructions with attacker controlled register or memory state, by requiring the target of indirect branches to start with a special marker instruction, ENDBRANCH. If an indirect branch is executed and the next instruction is not an ENDBRANCH, the CPU generates a #CP. Note, ENDBRANCH behaves as a NOP if IBT is disabled or unsupported. From a virtualization perspective, CET presents several problems. While SHSTK and IBT have two layers of enabling, a global control in the form of a CR4 bit, and a per-feature control in user and kernel (supervisor) MSRs (U_CET and S_CET respectively), the {S,U}_CET MSRs can be context switched via XSAVES/XRSTORS. Practically speaking, intercepting and emulating XSAVES/XRSTORS is not a viable option due to complexity, and outright disallowing use of XSTATE to context switch SHSTK/IBT state would render the features unusable to most guests. To limit the overall complexity without sacrificing performance or usability, simply ignore the potential virtualization hole, but ensure that all paths in KVM treat SHSTK/IBT as usable by the guest if the feature is supported in hardware, and the guest has access to at least one of SHSTK or IBT. I.e. allow userspace to advertise one of SHSTK or IBT if both are supported in hardware, even though doing so would allow a misbehaving guest to use the unadvertised feature. Fully emulating SHSTK and IBT would also require significant complexity, e.g. to track and update branch state for IBT, and shadow stack state for SHSTK. Given that emulating large swaths of the guest code stream isn't necessary on modern CPUs, punt on emulating instructions that meaningful impact or consume SHSTK or IBT. However, instead of doing nothing, explicitly reject emulation of such instructions so that KVM's emulator can't be abused to circumvent CET. Disable support for SHSTK and IBT if KVM is configured such that emulation of arbitrary guest instructions may be required, specifically if Unrestricted Guest (Intel only) is disabled, or if KVM will emulate a guest.MAXPHYADDR that is smaller than host.MAXPHYADDR. Lastly disable SHSTK support if shadow paging is enabled, as the protections for the shadow stack are novel (shadow stacks require Writable=0,Dirty=1, so that they can't be directly modified by software), i.e. would require non-trivial support in the Shadow MMU. Note, AMD CPUs currently only support SHSTK. Explicitly disable IBT support so that KVM doesn't over-advertise if AMD CPUs add IBT, and virtualizing IBT in SVM requires KVM modifications.
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/vmx.h9
-rw-r--r--arch/x86/include/uapi/asm/kvm.h34
3 files changed, 47 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e611901d310..0eed9b430849 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -142,7 +142,7 @@
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
- | X86_CR4_LAM_SUP))
+ | X86_CR4_LAM_SUP | X86_CR4_CET))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -267,6 +267,7 @@ enum x86_intercept_stage;
#define PFERR_RSVD_MASK BIT(3)
#define PFERR_FETCH_MASK BIT(4)
#define PFERR_PK_MASK BIT(5)
+#define PFERR_SS_MASK BIT(6)
#define PFERR_SGX_MASK BIT(15)
#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
@@ -815,7 +816,6 @@ struct kvm_vcpu_arch {
bool at_instruction_boundary;
bool tpr_access_reporting;
bool xfd_no_write_intercept;
- u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
u64 perf_capabilities;
@@ -876,6 +876,8 @@ struct kvm_vcpu_arch {
u64 xcr0;
u64 guest_supported_xcr0;
+ u64 ia32_xss;
+ u64 guest_supported_xss;
struct kvm_pio_request pio;
void *pio_data;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cca7d6641287..c85c50019523 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -106,6 +106,7 @@
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
+#define VM_EXIT_LOAD_CET_STATE 0x10000000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -119,6 +120,7 @@
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000
#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
+#define VM_ENTRY_LOAD_CET_STATE 0x00100000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
@@ -132,6 +134,7 @@
#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
#define VMX_BASIC_INOUT BIT_ULL(54)
#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56)
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
@@ -369,6 +372,9 @@ enum vmcs_field {
GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
GUEST_SYSENTER_ESP = 0x00006824,
GUEST_SYSENTER_EIP = 0x00006826,
+ GUEST_S_CET = 0x00006828,
+ GUEST_SSP = 0x0000682a,
+ GUEST_INTR_SSP_TABLE = 0x0000682c,
HOST_CR0 = 0x00006c00,
HOST_CR3 = 0x00006c02,
HOST_CR4 = 0x00006c04,
@@ -381,6 +387,9 @@ enum vmcs_field {
HOST_IA32_SYSENTER_EIP = 0x00006c12,
HOST_RSP = 0x00006c14,
HOST_RIP = 0x00006c16,
+ HOST_S_CET = 0x00006c18,
+ HOST_SSP = 0x00006c1a,
+ HOST_INTR_SSP_TABLE = 0x00006c1c
};
/*
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0f15d683817d..d420c9c066d4 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -35,6 +35,11 @@
#define MC_VECTOR 18
#define XM_VECTOR 19
#define VE_VECTOR 20
+#define CP_VECTOR 21
+
+#define HV_VECTOR 28
+#define VC_VECTOR 29
+#define SX_VECTOR 30
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
@@ -411,6 +416,35 @@ struct kvm_xcrs {
__u64 padding[16];
};
+#define KVM_X86_REG_TYPE_MSR 2
+#define KVM_X86_REG_TYPE_KVM 3
+
+#define KVM_X86_KVM_REG_SIZE(reg) \
+({ \
+ reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg) \
+({ \
+ __u64 type_size = (__u64)type << 32; \
+ \
+ type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \
+ type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \
+ 0; \
+ type_size; \
+})
+
+#define KVM_X86_REG_ID(type, index) \
+ (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index)
+
+/* KVM-defined registers starting from 0 */
+#define KVM_REG_GUEST_SSP 0
+
#define KVM_SYNC_X86_REGS (1UL << 0)
#define KVM_SYNC_X86_SREGS (1UL << 1)
#define KVM_SYNC_X86_EVENTS (1UL << 2)