aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/entry/entry.S16
-rw-r--r--arch/x86/include/asm/amd_nb.h5
-rw-r--r--arch/x86/include/asm/asm-prototypes.h3
-rw-r--r--arch/x86/include/asm/topology.h5
-rw-r--r--arch/x86/kernel/acpi/cppc.c7
-rw-r--r--arch/x86/kernel/cpu/amd.c11
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/traps.c12
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kvm/lapic.c29
-rw-r--r--arch/x86/kvm/svm/sev.c15
-rw-r--r--arch/x86/kvm/vmx/nested.c30
-rw-r--r--arch/x86/kvm/vmx/vmx.c6
-rw-r--r--arch/x86/mm/ioremap.c6
16 files changed, 117 insertions, 41 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 16354dfa6d96..7b9a7e8f39ac 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2084,6 +2084,9 @@ config ARCH_SUPPORTS_KEXEC_JUMP
config ARCH_SUPPORTS_CRASH_DUMP
def_bool X86_64 || (X86_32 && HIGHMEM)
+config ARCH_DEFAULT_CRASH_DUMP
+ def_bool y
+
config ARCH_SUPPORTS_CRASH_HOTPLUG
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index cd75e78a06c1..5b773b34768d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -142,9 +142,10 @@ ifeq ($(CONFIG_X86_32),y)
ifeq ($(CONFIG_STACKPROTECTOR),y)
ifeq ($(CONFIG_SMP),y)
- KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
+ KBUILD_CFLAGS += -mstack-protector-guard-reg=fs \
+ -mstack-protector-guard-symbol=__ref_stack_chk_guard
else
- KBUILD_CFLAGS += -mstack-protector-guard=global
+ KBUILD_CFLAGS += -mstack-protector-guard=global
endif
endif
else
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 324686bca368..b7ea3e8e9ecc 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -51,3 +51,19 @@ EXPORT_SYMBOL_GPL(mds_verw_sel);
.popsection
THUNK warn_thunk_thunk, __warn_thunk
+
+#ifndef CONFIG_X86_64
+/*
+ * Clang's implementation of TLS stack cookies requires the variable in
+ * question to be a TLS variable. If the variable happens to be defined as an
+ * ordinary variable with external linkage in the same compilation unit (which
+ * amounts to the whole of vmlinux with LTO enabled), Clang will drop the
+ * segment register prefix from the references, resulting in broken code. Work
+ * around this by avoiding the symbol used in -mstack-protector-guard-symbol=
+ * entirely in the C code, and use an alias emitted by the linker script
+ * instead.
+ */
+#ifdef CONFIG_STACKPROTECTOR
+EXPORT_SYMBOL(__ref_stack_chk_guard);
+#endif
+#endif
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 6f3b6aef47ba..d0caac26533f 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -116,7 +116,10 @@ static inline bool amd_gart_present(void)
#define amd_nb_num(x) 0
#define amd_nb_has_feature(x) false
-#define node_to_amd_nb(x) NULL
+static inline struct amd_northbridge *node_to_amd_nb(int node)
+{
+ return NULL;
+}
#define amd_gart_present(x) false
#endif
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 25466c4d2134..3674006e3974 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -20,3 +20,6 @@
extern void cmpxchg8b_emu(void);
#endif
+#if defined(__GENKSYMS__) && defined(CONFIG_STACKPROTECTOR)
+extern unsigned long __ref_stack_chk_guard;
+#endif
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index aef70336d624..92f3664dd933 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -305,9 +305,4 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled
extern void arch_scale_freq_tick(void);
#define arch_scale_freq_tick arch_scale_freq_tick
-#ifdef CONFIG_ACPI_CPPC_LIB
-void init_freq_invariance_cppc(void);
-#define arch_init_invariance_cppc init_freq_invariance_cppc
-#endif
-
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index 956984054bf3..aab9d0570841 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -110,7 +110,7 @@ static void amd_set_max_freq_ratio(void)
static DEFINE_MUTEX(freq_invariance_lock);
-void init_freq_invariance_cppc(void)
+static inline void init_freq_invariance_cppc(void)
{
static bool init_done;
@@ -127,6 +127,11 @@ void init_freq_invariance_cppc(void)
mutex_unlock(&freq_invariance_lock);
}
+void acpi_processor_init_invariance_cppc(void)
+{
+ init_freq_invariance_cppc();
+}
+
/*
* Get the highest performance register value.
* @cpu: CPU from which to get highest performance.
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fab5caec0b72..823f44f7bc94 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -924,6 +924,17 @@ static void init_amd_zen4(struct cpuinfo_x86 *c)
{
if (!cpu_has(c, X86_FEATURE_HYPERVISOR))
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+
+ /*
+ * These Zen4 SoCs advertise support for virtualized VMLOAD/VMSAVE
+ * in some BIOS versions but they can lead to random host reboots.
+ */
+ switch (c->x86_model) {
+ case 0x18 ... 0x1f:
+ case 0x60 ... 0x7f:
+ clear_cpu_cap(c, X86_FEATURE_V_VMSAVE_VMLOAD);
+ break;
+ }
}
static void init_amd_zen5(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5f221ea5688..f43bb974fc66 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2089,8 +2089,10 @@ void syscall_init(void)
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+#ifndef CONFIG_SMP
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
+#endif
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d05392db5d0f..2dbadf347b5f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -261,12 +261,6 @@ static noinstr bool handle_bug(struct pt_regs *regs)
int ud_type;
u32 imm;
- /*
- * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
- * is a rare case that uses @regs without passing them to
- * irqentry_enter().
- */
- kmsan_unpoison_entry_regs(regs);
ud_type = decode_bug(regs->ip, &imm);
if (ud_type == BUG_NONE)
return handled;
@@ -276,6 +270,12 @@ static noinstr bool handle_bug(struct pt_regs *regs)
*/
instrumentation_begin();
/*
+ * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
+ * is a rare case that uses @regs without passing them to
+ * irqentry_enter().
+ */
+ kmsan_unpoison_entry_regs(regs);
+ /*
* Since we're emulating a CALL with exceptions, restore the interrupt
* state to what it was at the exception site.
*/
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index b8c5741d2fb4..feb8102a9ca7 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -491,6 +491,9 @@ SECTIONS
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
+/* needed for Clang - see arch/x86/entry/entry.S */
+PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
+
#ifdef CONFIG_X86_64
/*
* Per-cpu symbols which need to be offset from __per_cpu_load
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2098dc689088..95c6beb8ce27 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2629,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (apic->apicv_active) {
- /* irr_pending is always true when apicv is activated. */
- apic->irr_pending = true;
+ /*
+ * When APICv is enabled, KVM must always search the IRR for a pending
+ * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
+ * isn't running. If APICv is disabled, KVM _should_ search the IRR
+ * for a pending IRQ. But KVM currently doesn't ensure *all* hardware,
+ * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
+ * the IRR at this time could race with IRQ delivery from hardware that
+ * still sees APICv as being enabled.
+ *
+ * FIXME: Ensure other vCPUs and devices observe the change in APICv
+ * state prior to updating KVM's metadata caches, so that KVM
+ * can safely search the IRR and set irr_pending accordingly.
+ */
+ apic->irr_pending = true;
+
+ if (apic->apicv_active)
apic->isr_count = 1;
- } else {
- /*
- * Don't clear irr_pending, searching the IRR can race with
- * updates from the CPU as APICv is still active from hardware's
- * perspective. The flag will be cleared as appropriate when
- * KVM injects the interrupt.
- */
+ else
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
- }
+
apic->highest_isr_cache = -1;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0b851ef937f2..fb854cf20ac3 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -450,8 +450,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
goto e_free;
/* This needs to happen after SEV/SNP firmware initialization. */
- if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm))
- goto e_free;
+ if (vm_type == KVM_X86_SNP_VM) {
+ ret = snp_guest_req_init(kvm);
+ if (ret)
+ goto e_free;
+ }
INIT_LIST_HEAD(&sev->regions_list);
INIT_LIST_HEAD(&sev->mirror_vms);
@@ -2212,10 +2215,6 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (sev->snp_context)
return -EINVAL;
- sev->snp_context = snp_context_create(kvm, argp);
- if (!sev->snp_context)
- return -ENOTTY;
-
if (params.flags)
return -EINVAL;
@@ -2230,6 +2229,10 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET)
return -EINVAL;
+ sev->snp_context = snp_context_create(kvm, argp);
+ if (!sev->snp_context)
+ return -ENOTTY;
+
start.gctx_paddr = __psp_pa(sev->snp_context);
start.policy = params.policy;
memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a8e7bc04d9bf..931a7361c30f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept);
/*
- * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
- * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
- * full TLB flush from the guest's perspective. This is required even
- * if VPID is disabled in the host as KVM may need to synchronize the
- * MMU in response to the guest TLB flush.
+ * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
+ * same VPID as the host, and so architecturally, linear and combined
+ * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM
+ * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2,
+ * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This
+ * is required if VPID is disabled in KVM, as a TLB flush (there are no
+ * VPIDs) still occurs from L1's perspective, and KVM may need to
+ * synchronize the MMU in response to the guest TLB flush.
*
* Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
* EPT is a special snowflake, as guest-physical mappings aren't
@@ -2315,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
+ /*
+ * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
+ * same VPID as the host. Emulate this behavior by using vpid01 for L2
+ * if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter
+ * and VM-Exit are architecturally required to flush VPID=0, but *only*
+ * VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the
+ * required flushes), but doing so would cause KVM to over-flush. E.g.
+ * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled,
+ * and then runs L2 X again, then KVM can and should retain TLB entries
+ * for VPID12=1.
+ */
if (enable_vpid) {
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
@@ -5950,6 +5964,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ /*
+ * Always flush the effective vpid02, i.e. never flush the current VPID
+ * and never explicitly flush vpid01. INVVPID targets a VPID, not a
+ * VMCS, and so whether or not the current vmcs12 has VPID enabled is
+ * irrelevant (and there may not be a loaded vmcs12).
+ */
vpid02 = nested_get_vpid02(vcpu);
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 81ed596e4454..d28618e9277e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -217,9 +217,11 @@ module_param(ple_window_shrink, uint, 0444);
static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, uint, 0444);
-/* Default is SYSTEM mode, 1 for host-guest mode */
+/* Default is SYSTEM mode, 1 for host-guest mode (which is BROKEN) */
int __read_mostly pt_mode = PT_MODE_SYSTEM;
+#ifdef CONFIG_BROKEN
module_param(pt_mode, int, S_IRUGO);
+#endif
struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
@@ -3216,7 +3218,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu)))
return nested_get_vpid02(vcpu);
return to_vmx(vcpu)->vpid;
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 70b02fc61d93..8d29163568a7 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -656,7 +656,8 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
paddr_next = data->next;
len = data->len;
- if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+ if ((phys_addr > paddr) &&
+ (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
memunmap(data);
return true;
}
@@ -718,7 +719,8 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
paddr_next = data->next;
len = data->len;
- if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+ if ((phys_addr > paddr) &&
+ (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
early_memunmap(data, sizeof(*data));
return true;
}