diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-10 09:06:53 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-10 09:06:53 -0700 |
| commit | 73d7cf07109e79b093d1a1fb57a88d4048cd9b4b (patch) | |
| tree | b472565858ee1de9c06ddaf6475cf78a78bd0b4e /arch/x86/kvm | |
| parent | eventpoll: don't decrement ep refcount while still holding the ep mutex (diff) | |
| parent | KVM: x86: avoid underflow when scaling TSC frequency (diff) | |
| download | linux-73d7cf07109e79b093d1a1fb57a88d4048cd9b4b.tar.gz linux-73d7cf07109e79b093d1a1fb57a88d4048cd9b4b.zip | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini:
"Many patches, pretty much all of them small, that accumulated while I
was on vacation.
ARM:
- Remove the last leftovers of the ill-fated FPSIMD host state
mapping at EL2 stage-1
- Fix unexpected advertisement to the guest of unimplemented S2 base
granule sizes
- Gracefully fail initialising pKVM if the interrupt controller isn't
GICv3
- Also gracefully fail initialising pKVM if the carveout allocation
fails
- Fix the computing of the minimum MMIO range required for the host
on stage-2 fault
- Fix the generation of the GICv3 Maintenance Interrupt in nested
mode
x86:
- Reject SEV{-ES} intra-host migration if one or more vCPUs are
actively being created, so as not to create a non-SEV{-ES} vCPU in
an SEV{-ES} VM
- Use a pre-allocated, per-vCPU buffer for handling de-sparsification
of vCPU masks in Hyper-V hypercalls; fixes a "stack frame too
large" issue
- Allow out-of-range/invalid Xen event channel ports when configuring
IRQ routing, to avoid dictating a specific ioctl() ordering to
userspace
- Conditionally reschedule when setting memory attributes to avoid
soft lockups when userspace converts huge swaths of memory to/from
private
- Add back MWAIT as a required feature for the MONITOR/MWAIT selftest
- Add a missing field in struct sev_data_snp_launch_start that
resulted in the guest-visible workarounds field being filled at the
wrong offset
- Skip non-canonical address when processing Hyper-V PV TLB flushes
to avoid VM-Fail on INVVPID
- Advertise supported TDX TDVMCALLs to userspace
- Pass SetupEventNotifyInterrupt arguments to userspace
- Fix TSC frequency underflow"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: avoid underflow when scaling TSC frequency
KVM: arm64: Remove kvm_arch_vcpu_run_map_fp()
KVM: arm64: Fix handling of FEAT_GTG for unimplemented granule sizes
KVM: arm64: Don't free hyp pages with pKVM on GICv2
KVM: arm64: Fix error path in init_hyp_mode()
KVM: arm64: Adjust range correctly during host stage-2 faults
KVM: arm64: nv: Fix MI line level calculation in vgic_v3_nested_update_mi()
KVM: x86/hyper-v: Skip non-canonical addresses during PV TLB flush
KVM: SVM: Add missing member in SNP_LAUNCH_START command structure
Documentation: KVM: Fix unexpected unindent warnings
KVM: selftests: Add back the missing check of MONITOR/MWAIT availability
KVM: Allow CPU to reschedule while setting per-page memory attributes
KVM: x86/xen: Allow 'out of range' event channel ports in IRQ routing table.
KVM: x86/hyper-v: Use preallocated per-vCPU buffer for de-sparsified vCPU masks
KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL
KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight
KVM: TDX: Report supported optional TDVMCALLs in TDX capabilities
KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt
Diffstat (limited to 'arch/x86/kvm')
| -rw-r--r-- | arch/x86/kvm/hyperv.c | 5 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/sev.c | 12 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/tdx.c | 30 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/xen.c | 15 |
5 files changed, 60 insertions, 6 deletions
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 24f0318c50d7..ee27064dd72f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. @@ -2001,11 +2004,11 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + unsigned long *vcpu_mask = hv_vcpu->vcpu_mask; u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 459c3b791fd4..b201f77fcd49 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -4445,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 1ad20c273f3b..f31ccdeb905b 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -173,6 +173,9 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i tdx_clear_unsupported_cpuid(entry); } +#define TDVMCALLINFO_GET_QUOTE BIT(0) +#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1) + static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, struct kvm_tdx_capabilities *caps) { @@ -188,6 +191,10 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, caps->cpuid.nent = td_conf->num_cpuid_config; + caps->user_tdvmcallinfo_1_r11 = + TDVMCALLINFO_GET_QUOTE | + TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT; + for (i = 0; i < td_conf->num_cpuid_config; i++) td_init_cpuid_entry2(&caps->cpuid.entries[i], i); @@ -1530,6 +1537,27 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu) return 0; } +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1541,6 +1569,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_get_td_vm_call_info(vcpu); case TDVMCALL_GET_QUOTE: return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9d992d5652f..357b9e3a6cef 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ - if (kvm_caps.has_tsc_control) + if (kvm_caps.has_tsc_control) { tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); + tgt_tsc_khz = tgt_tsc_khz ? : 1; + } if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b029bb29a16..5fa2cca43653 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) |
