From 0910dd7c9ad45a2605c45fd2bf3d1bcac087687c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:09 -0700 Subject: KVM: SVM: Skip fastpath emulation on VM-Exit if next RIP isn't valid Skip the WRMSR and HLT fastpaths in SVM's VM-Exit handler if the next RIP isn't valid, e.g. because KVM is running with nrips=false. SVM must decode and emulate to skip the instruction if the CPU doesn't provide the next RIP, and getting the instruction bytes to decode requires reading guest memory. Reading guest memory through the emulator can fault, i.e. can sleep, which is disallowed since the fastpath handlers run with IRQs disabled. BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:106 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 32611, name: qemu preempt_count: 1, expected: 0 INFO: lockdep is turned off. irq event stamp: 30580 hardirqs last enabled at (30579): [] vcpu_run+0x1787/0x1db0 [kvm] hardirqs last disabled at (30580): [] __schedule+0x1e2/0xed0 softirqs last enabled at (30570): [] fpu_swap_kvm_fpstate+0x44/0x210 softirqs last disabled at (30568): [] fpu_swap_kvm_fpstate+0x44/0x210 CPU: 298 UID: 0 PID: 32611 Comm: qemu Tainted: G U 6.16.0-smp--e6c618b51cfe-sleep #782 NONE Tainted: [U]=USER Hardware name: Google Astoria-Turin/astoria, BIOS 0.20241223.2-0 01/17/2025 Call Trace: dump_stack_lvl+0x7d/0xb0 __might_resched+0x271/0x290 __might_fault+0x28/0x80 kvm_vcpu_read_guest_page+0x8d/0xc0 [kvm] kvm_fetch_guest_virt+0x92/0xc0 [kvm] __do_insn_fetch_bytes+0xf3/0x1e0 [kvm] x86_decode_insn+0xd1/0x1010 [kvm] x86_emulate_instruction+0x105/0x810 [kvm] __svm_skip_emulated_instruction+0xc4/0x140 [kvm_amd] handle_fastpath_invd+0xc4/0x1a0 [kvm] vcpu_run+0x11a1/0x1db0 [kvm] kvm_arch_vcpu_ioctl_run+0x5cc/0x730 [kvm] kvm_vcpu_ioctl+0x578/0x6a0 [kvm] __se_sys_ioctl+0x6d/0xb0 do_syscall_64+0x8a/0x2c0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f479d57a94b Note, this is essentially a reapply of commit 5c30e8101e8d ("KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"), but with different justification (KVM now grabs SRCU when skipping the instruction for other reasons). Fixes: b439eb8ab578 ("Revert "KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250805190526.1453366-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d9931c6c4bc6..829d9d46718d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4181,13 +4181,21 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + + /* + * Next RIP must be provided as IRQs are disabled, and accessing guest + * memory to decode the instruction might fault, i.e. might sleep. + */ + if (!nrips || !control->next_rip) + return EXIT_FASTPATH_NONE; if (is_guest_mode(vcpu)) return EXIT_FASTPATH_NONE; - switch (svm->vmcb->control.exit_code) { + switch (control->exit_code) { case SVM_EXIT_MSR: - if (!svm->vmcb->control.exit_info_1) + if (!control->exit_info_1) break; return handle_fastpath_set_msr_irqoff(vcpu); case SVM_EXIT_HLT: -- cgit v1.2.3 From 6c3d4b917995a17f515943ccd39ba11b81753b0d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:26 -0700 Subject: KVM: x86: Add a fastpath handler for INVD Add a fastpath handler for INVD so that the common fastpath logic can be trivially tested on both Intel and AMD. Under KVM, INVD is always: (a) intercepted, (b) available to the guest, and (c) emulated as a nop, with no side effects. Combined with INVD not having any inputs or outputs, i.e. no register constraints, INVD is the perfect instruction for exercising KVM's fastpath as it can be inserted into practically any guest-side code stream. Link: https://lore.kernel.org/r/20250805190526.1453366-19-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 9 +++++++++ arch/x86/kvm/x86.h | 1 + 4 files changed, 14 insertions(+) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 829d9d46718d..f7e1e665a826 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4200,6 +4200,8 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return handle_fastpath_set_msr_irqoff(vcpu); case SVM_EXIT_HLT: return handle_fastpath_hlt(vcpu); + case SVM_EXIT_INVD: + return handle_fastpath_invd(vcpu); default: break; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..95765db52992 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7175,6 +7175,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); case EXIT_REASON_HLT: return handle_fastpath_hlt(vcpu); + case EXIT_REASON_INVD: + return handle_fastpath_invd(vcpu); default: return EXIT_FASTPATH_NONE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 093bfc8d00b3..6e56d5cff44d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2087,6 +2087,15 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_invd); +fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu) +{ + if (!kvm_emulate_invd(vcpu)) + return EXIT_FASTPATH_EXIT_USERSPACE; + + return EXIT_FASTPATH_REENTER_GUEST; +} +EXPORT_SYMBOL_GPL(handle_fastpath_invd); + int kvm_handle_invalid_op(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index bcfd9b719ada..46220b04cdf2 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -439,6 +439,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu); extern struct kvm_caps kvm_caps; extern struct kvm_host_values kvm_host; -- cgit v1.2.3 From 87a877de367d835b527d1086f75727123ef85fc4 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Aug 2025 13:22:21 -0700 Subject: KVM: x86: Rename handle_fastpath_set_msr_irqoff() to handle_fastpath_wrmsr() Rename the WRMSR fastpath API to drop "irqoff", as that information is redundant (the fastpath always runs with IRQs disabled), and to prepare for adding a fastpath for the immediate variant of WRMSRNS. No functional change intended. Signed-off-by: Xin Li (Intel) [sean: split to separate patch, write changelog] Link: https://lore.kernel.org/r/20250805202224.1475590-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- arch/x86/kvm/x86.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f7e1e665a826..ca550c4fa174 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4197,7 +4197,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) case SVM_EXIT_MSR: if (!control->exit_info_1) break; - return handle_fastpath_set_msr_irqoff(vcpu); + return handle_fastpath_wrmsr(vcpu); case SVM_EXIT_HLT: return handle_fastpath_hlt(vcpu); case SVM_EXIT_INVD: diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 95765db52992..ae2c8c10e5d2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7170,7 +7170,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, switch (vmx_get_exit_reason(vcpu).basic) { case EXIT_REASON_MSR_WRITE: - return handle_fastpath_set_msr_irqoff(vcpu); + return handle_fastpath_wrmsr(vcpu); case EXIT_REASON_PREEMPTION_TIMER: return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); case EXIT_REASON_HLT: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f7c5db3d2652..85e40d61d18b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2143,7 +2143,7 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } -fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) +fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) { u64 data = kvm_read_edx_eax(vcpu); u32 msr = kvm_rcx_read(vcpu); @@ -2168,7 +2168,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_REENTER_GUEST; } -EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); +EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr); /* * Adapt set_msr() to msr_io()'s calling convention diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 46220b04cdf2..2dab9c9d6199 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -437,7 +437,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, void *insn, int insn_len); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); -fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From d90ebf5a06ecc6d4d7f72d12c5029519af8f838d Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Aug 2025 13:22:24 -0700 Subject: KVM: x86: Advertise support for the immediate form of MSR instructions Advertise support for the immediate form of MSR instructions to userspace if the instructions are supported by the underlying CPU, and KVM is using VMX, i.e. is running on an Intel-compatible CPU. For SVM, explicitly clear X86_FEATURE_MSR_IMM to ensure KVM doesn't over- report support if AMD-compatible CPUs ever implement the immediate forms, as SVM will likely require explicit enablement in KVM. Signed-off-by: Xin Li (Intel) [sean: massage changelog] Link: https://lore.kernel.org/r/20250805202224.1475590-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 6 +++++- arch/x86/kvm/reverse_cpuid.h | 5 +++++ arch/x86/kvm/svm/svm.c | 6 +++++- 4 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dbdec6025fde..735b5d1e62dd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -774,6 +774,7 @@ enum kvm_only_cpuid_leafs { CPUID_7_2_EDX, CPUID_24_0_EBX, CPUID_8000_0021_ECX, + CPUID_7_1_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bee8c869259f..2705545f67fe 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -985,6 +985,10 @@ void kvm_set_cpu_caps(void) F(LAM), ); + kvm_cpu_cap_init(CPUID_7_1_ECX, + SCATTERED_F(MSR_IMM), + ); + kvm_cpu_cap_init(CPUID_7_1_EDX, F(AVX_VNNI_INT8), F(AVX_NE_CONVERT), @@ -1411,9 +1415,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; cpuid_entry_override(entry, CPUID_7_1_EAX); + cpuid_entry_override(entry, CPUID_7_1_ECX); cpuid_entry_override(entry, CPUID_7_1_EDX); entry->ebx = 0; - entry->ecx = 0; } if (max_idx >= 2) { entry = do_host_cpuid(array, function, 2); diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index c53b92379e6e..743ab25ba787 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -25,6 +25,9 @@ #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) #define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11) +/* Intel-defined sub-features, CPUID level 0x00000007:1 (ECX) */ +#define KVM_X86_FEATURE_MSR_IMM KVM_X86_FEATURE(CPUID_7_1_ECX, 5) + /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ #define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) @@ -87,6 +90,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, + [CPUID_7_1_ECX] = { 7, 1, CPUID_ECX}, }; /* @@ -128,6 +132,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); + KVM_X86_TRANSLATE_FEATURE(MSR_IMM); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ca550c4fa174..7e7821ee8ee1 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5311,8 +5311,12 @@ static __init void svm_set_cpu_caps(void) /* CPUID 0x8000001F (SME/SEV features) */ sev_set_cpu_caps(); - /* Don't advertise Bus Lock Detect to guest if SVM support is absent */ + /* + * Clear capabilities that are automatically configured by common code, + * but that require explicit SVM support (that isn't yet implemented). + */ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); + kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM); } static __init int svm_hardware_setup(void) -- cgit v1.2.3 From 6057497336bbfabd3a2f632bba2cd2bfbcb7b304 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Aug 2025 12:56:46 -0700 Subject: KVM: x86: Rework KVM_REQ_MSR_FILTER_CHANGED into a generic RECALC_INTERCEPTS Rework the MSR_FILTER_CHANGED request into a more generic RECALC_INTERCEPTS request, and expand the responsibilities of vendor code to recalculate all intercepts that vary based on userspace input, e.g. instruction intercepts that are tied to guest CPUID. Providing a generic recalc request will allow the upcoming mediated PMU support to trigger a recalc when PMU features, e.g. PERF_CAPABILITIES, are set by userspace, without having to make multiple calls to/from PMU code. As a bonus, using a request will effectively coalesce recalcs, e.g. will reduce the number of recalcs for normal usage from 3+ to 1 (vCPU create, set CPUID, set PERF_CAPABILITIES (Intel only), set filter). The downside is that MSR filter changes that are done in isolation will do a small amount of unnecessary work, but that's already a relatively slow path, and the cost of recalculating instruction intercepts is negligible. Tested-by: Xudong Hao Link: https://lore.kernel.org/r/20250806195706.1650976-25-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm-x86-ops.h | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm/svm.c | 8 ++++---- arch/x86/kvm/vmx/main.c | 14 +++++++------- arch/x86/kvm/vmx/vmx.c | 9 +++++++-- arch/x86/kvm/vmx/x86_ops.h | 2 +- arch/x86/kvm/x86.c | 15 +++++++-------- 7 files changed, 29 insertions(+), 25 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 18a5c3119e1a..7c240e23bd52 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -138,7 +138,7 @@ KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) -KVM_X86_OP(recalc_msr_intercepts) +KVM_X86_OP(recalc_intercepts) KVM_X86_OP(complete_emulated_msr) KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 464a636b2dc6..7e1e41f04752 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -120,7 +120,7 @@ #define KVM_REQ_TLB_FLUSH_GUEST \ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) -#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) +#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29) #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ @@ -1914,7 +1914,7 @@ struct kvm_x86_ops { int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); - void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu); + void (*recalc_intercepts)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7e7821ee8ee1..711f160113d1 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1077,7 +1077,7 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu) } } -static void svm_recalc_intercepts_after_set_cpuid(struct kvm_vcpu *vcpu) +static void svm_recalc_intercepts(struct kvm_vcpu *vcpu) { svm_recalc_instruction_intercepts(vcpu); svm_recalc_msr_intercepts(vcpu); @@ -1225,7 +1225,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_hv_init_vmcb(vmcb); - svm_recalc_intercepts_after_set_cpuid(vcpu); + svm_recalc_intercepts(vcpu); vmcb_mark_all_dirty(vmcb); @@ -4479,7 +4479,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) if (sev_guest(vcpu->kvm)) sev_vcpu_after_set_cpuid(svm); - svm_recalc_intercepts_after_set_cpuid(vcpu); + svm_recalc_intercepts(vcpu); } static bool svm_has_wbinvd_exit(void) @@ -5181,7 +5181,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, - .recalc_msr_intercepts = svm_recalc_msr_intercepts, + .recalc_intercepts = svm_recalc_intercepts, .complete_emulated_msr = svm_complete_emulated_msr, .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index dbab1c15b0cd..68dcafd177a8 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -188,18 +188,18 @@ static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return vmx_get_msr(vcpu, msr_info); } -static void vt_recalc_msr_intercepts(struct kvm_vcpu *vcpu) +static void vt_recalc_intercepts(struct kvm_vcpu *vcpu) { /* - * TDX doesn't allow VMM to configure interception of MSR accesses. - * TDX guest requests MSR accesses by calling TDVMCALL. The MSR - * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR - * if the userspace has set any. + * TDX doesn't allow VMM to configure interception of instructions or + * MSR accesses. TDX guest requests MSR accesses by calling TDVMCALL. + * The MSR filters will be applied when handling the TDVMCALL for + * RDMSR/WRMSR if the userspace has set any. */ if (is_td_vcpu(vcpu)) return; - vmx_recalc_msr_intercepts(vcpu); + vmx_recalc_intercepts(vcpu); } static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) @@ -995,7 +995,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .apic_init_signal_blocked = vt_op(apic_init_signal_blocked), .migrate_timers = vmx_migrate_timers, - .recalc_msr_intercepts = vt_op(recalc_msr_intercepts), + .recalc_intercepts = vt_op(recalc_intercepts), .complete_emulated_msr = vt_op(complete_emulated_msr), .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5d3a50547c3e..68bec421f3fc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4068,7 +4068,7 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } -void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) +static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) { if (!cpu_has_vmx_msr_bitmap()) return; @@ -4121,6 +4121,11 @@ void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) */ } +void vmx_recalc_intercepts(struct kvm_vcpu *vcpu) +{ + vmx_recalc_msr_intercepts(vcpu); +} + static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { @@ -7802,7 +7807,7 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) ~FEAT_CTL_SGX_LC_ENABLED; /* Recalc MSR interception to account for feature changes. */ - vmx_recalc_msr_intercepts(vcpu); + vmx_recalc_intercepts(vcpu); /* Refresh #PF interception to account for MAXPHYADDR changes. */ vmx_update_exception_bitmap(vcpu); diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 2b3424f638db..2c590ff44ced 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -52,7 +52,7 @@ void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, int trig_mode, int vector); void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); -void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu); +void vmx_recalc_intercepts(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); int vmx_get_feature_msr(u32 msr, u64 *data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 99f2a150ca78..64e08148909b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6794,7 +6794,11 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, kvm_free_msr_filter(old_filter); - kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); + /* + * Recalc MSR intercepts as userspace may want to intercept accesses to + * MSRs that KVM would otherwise pass through to the guest. + */ + kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS); return 0; } @@ -10827,13 +10831,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) kvm_check_async_pf_completion(vcpu); - /* - * Recalc MSR intercepts as userspace may want to intercept - * accesses to MSRs that KVM would otherwise pass through to - * the guest. - */ - if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) - kvm_x86_call(recalc_msr_intercepts)(vcpu); + if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu)) + kvm_x86_call(recalc_intercepts)(vcpu); if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) kvm_x86_call(update_cpu_dirty_logging)(vcpu); -- cgit v1.2.3 From 5a1a726e68ff256f564cf51ad21a96bceb4dd954 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Aug 2025 12:56:47 -0700 Subject: KVM: x86: Use KVM_REQ_RECALC_INTERCEPTS to react to CPUID updates Defer recalculating MSR and instruction intercepts after a CPUID update via RECALC_INTERCEPTS to converge on RECALC_INTERCEPTS as the "official" mechanism for triggering recalcs. As a bonus, because KVM does a "recalc" during vCPU creation, and every functional VMM sets CPUID at least once, for all intents and purposes this saves at least one recalc. Tested-by: Xudong Hao Link: https://lore.kernel.org/r/20250806195706.1650976-26-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 ++ arch/x86/kvm/svm/svm.c | 4 +--- arch/x86/kvm/vmx/vmx.c | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/svm/svm.c') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index ad6cadf09930..efee08fad72e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -448,6 +448,8 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * adjustments to the reserved GPA bits. */ kvm_mmu_after_set_cpuid(vcpu); + + kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu); } int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 711f160113d1..22c5e4ffa132 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1225,7 +1225,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_hv_init_vmcb(vmcb); - svm_recalc_intercepts(vcpu); + kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu); vmcb_mark_all_dirty(vmcb); @@ -4478,8 +4478,6 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) if (sev_guest(vcpu->kvm)) sev_vcpu_after_set_cpuid(svm); - - svm_recalc_intercepts(vcpu); } static bool svm_has_wbinvd_exit(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 68bec421f3fc..6b9523ca082c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7806,9 +7806,6 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_LC_ENABLED; - /* Recalc MSR interception to account for feature changes. */ - vmx_recalc_intercepts(vcpu); - /* Refresh #PF interception to account for MAXPHYADDR changes. */ vmx_update_exception_bitmap(vcpu); } -- cgit v1.2.3