diff options
| author | Xin Li <xin@zytor.com> | 2025-08-05 13:22:22 -0700 |
|---|---|---|
| committer | Sean Christopherson <seanjc@google.com> | 2025-08-19 11:59:46 -0700 |
| commit | 885df2d2109a60f84d84639ce6d95a91045f6c45 (patch) | |
| tree | 98d78e2140b634999dc1b31c68258832d0daf24c /arch/x86/kvm/x86.c | |
| parent | KVM: x86: Rename handle_fastpath_set_msr_irqoff() to handle_fastpath_wrmsr() (diff) | |
| download | linux-885df2d2109a60f84d84639ce6d95a91045f6c45.tar.gz linux-885df2d2109a60f84d84639ce6d95a91045f6c45.zip | |
KVM: x86: Add support for RDMSR/WRMSRNS w/ immediate on Intel
Add support for the immediate forms of RDMSR and WRMSRNS (currently
Intel-only). The immediate variants are only valid in 64-bit mode, and
use a single general purpose register for the data (the register is also
encoded in the instruction, i.e. not implicit like regular RDMSR/WRMSR).
The immediate variants are primarily motivated by performance, not code
size: by having the MSR index in an immediate, it is available *much*
earlier in the CPU pipeline, which allows hardware much more leeway about
how a particular MSR is handled.
Intel VMX support for the immediate forms of MSR accesses communicates
exit information to the host as follows:
1) The immediate form of RDMSR uses VM-Exit Reason 84.
2) The immediate form of WRMSRNS uses VM-Exit Reason 85.
3) For both VM-Exit reasons 84 and 85, the Exit Qualification field is
set to the MSR index that triggered the VM-Exit.
4) Bits 3 ~ 6 of the VM-Exit Instruction Information field are set to
the register encoding used by the immediate form of the instruction,
i.e. the destination register for RDMSR, and the source for WRMSRNS.
5) The VM-Exit Instruction Length field records the size of the
immediate form of the MSR instruction.
To deal with userspace RDMSR exits, stash the destination register in a
new kvm_vcpu_arch field, similar to cui_linear_rip, pio, etc.
Alternatively, the register could be saved in kvm_run.msr or re-retrieved
from the VMCS, but the former would require sanitizing the value to ensure
userspace doesn't clobber the value to an out-of-bounds index, and the
latter would require a new one-off kvm_x86_ops hook.
Don't bother adding support for the instructions in KVM's emulator, as the
only way for RDMSR/WRMSR to be encountered is if KVM is emulating large
swaths of code due to invalid guest state, and a vCPU cannot have invalid
guest state while in 64-bit mode.
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
[sean: minor tweaks, massage and expand changelog]
Link: https://lore.kernel.org/r/20250805202224.1475590-5-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to 'arch/x86/kvm/x86.c')
| -rw-r--r-- | arch/x86/kvm/x86.c | 55 |
1 files changed, 45 insertions, 10 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 85e40d61d18b..efd45b2e8f45 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1991,6 +1991,15 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) return complete_fast_msr_access(vcpu); } +static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu) +{ + if (!vcpu->run->msr.error) + kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg, + vcpu->run->msr.data); + + return complete_fast_msr_access(vcpu); +} + static u64 kvm_msr_reason(int r) { switch (r) { @@ -2025,39 +2034,53 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index, return 1; } -int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) +static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg, + int (*complete_rdmsr)(struct kvm_vcpu *)) { - u32 msr = kvm_rcx_read(vcpu); u64 data; int r; r = kvm_get_msr_with_filter(vcpu, msr, &data); - if (!r) { trace_kvm_msr_read(msr, data); - kvm_rax_write(vcpu, data & -1u); - kvm_rdx_write(vcpu, (data >> 32) & -1u); + if (reg < 0) { + kvm_rax_write(vcpu, data & -1u); + kvm_rdx_write(vcpu, (data >> 32) & -1u); + } else { + kvm_register_write(vcpu, reg, data); + } } else { /* MSR read failed? See if we should ask user space */ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0, - complete_fast_rdmsr, r)) + complete_rdmsr, r)) return 0; trace_kvm_msr_read_ex(msr); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); } + +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1, + complete_fast_rdmsr); +} EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); -int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + vcpu->arch.cui_rdmsr_imm_reg = reg; + + return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm); +} +EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr_imm); + +static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { - u32 msr = kvm_rcx_read(vcpu); - u64 data = kvm_read_edx_eax(vcpu); int r; r = kvm_set_msr_with_filter(vcpu, msr, data); - if (!r) { trace_kvm_msr_write(msr, data); } else { @@ -2073,8 +2096,20 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) return kvm_x86_call(complete_emulated_msr)(vcpu, r); } + +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu), + kvm_read_edx_eax(vcpu)); +} EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); +int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); +} +EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm); + int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) { return kvm_skip_emulated_instruction(vcpu); |
