aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-11-14 11:27:36 -0800
committerJakub Kicinski <kuba@kernel.org>2024-11-14 11:29:15 -0800
commita79993b5fce69e97f900bb975f6127e25cebf130 (patch)
treef73ec541ca4183893a0bdc3d30f779c6999a0d9a /arch
parenteth: fbnic: Add support to dump registers (diff)
parentMerge tag 'net-6.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/net... (diff)
downloadlinux-a79993b5fce69e97f900bb975f6127e25cebf130.tar.gz
linux-a79993b5fce69e97f900bb975f6127e25cebf130.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.12-rc8). Conflicts: tools/testing/selftests/net/.gitignore 252e01e68241 ("selftests: net: add netlink-dumps to .gitignore") be43a6b23829 ("selftests: ncdevmem: Move ncdevmem under drivers/net/hw") https://lore.kernel.org/all/20241113122359.1b95180a@canb.auug.org.au/ drivers/net/phy/phylink.c 671154f174e0 ("net: phylink: ensure PHY momentary link-fails are handled") 7530ea26c810 ("net: phylink: remove "using_mac_select_pcs"") Adjacent changes: drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c 5b366eae7193 ("stmmac: dwmac-intel-plat: fix call balance of tx_clk handling routines") e96321fad3ad ("net: ethernet: Switch back to struct platform_driver::remove()") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/mman.h10
-rw-r--r--arch/arm64/include/asm/topology.h4
-rw-r--r--arch/arm64/kernel/fpsimd.c1
-rw-r--r--arch/arm64/kernel/smccc-call.S35
-rw-r--r--arch/loongarch/include/asm/kasan.h13
-rw-r--r--arch/loongarch/include/asm/page.h5
-rw-r--r--arch/loongarch/kernel/acpi.c81
-rw-r--r--arch/loongarch/kernel/paravirt.c15
-rw-r--r--arch/loongarch/kernel/smp.c5
-rw-r--r--arch/loongarch/mm/kasan_init.c46
-rw-r--r--arch/parisc/include/asm/mman.h5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c12
-rw-r--r--arch/x86/include/asm/topology.h5
-rw-r--r--arch/x86/kernel/acpi/cppc.c7
-rw-r--r--arch/x86/kvm/lapic.c29
-rw-r--r--arch/x86/kvm/svm/sev.c15
-rw-r--r--arch/x86/kvm/vmx/nested.c30
-rw-r--r--arch/x86/kvm/vmx/vmx.c6
19 files changed, 213 insertions, 112 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fd9df6dcc593..70d7f4f20225 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2214,6 +2214,7 @@ config ARM64_SME
bool "ARM Scalable Matrix Extension support"
default y
depends on ARM64_SVE
+ depends on BROKEN
help
The Scalable Matrix Extension (SME) is an extension to the AArch64
execution state which utilises a substantial subset of the SVE
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 9e39217b4afb..798d965760d4 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -6,6 +6,8 @@
#ifndef BUILD_VDSO
#include <linux/compiler.h>
+#include <linux/fs.h>
+#include <linux/shmem_fs.h>
#include <linux/types.h>
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
@@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
+static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
+ unsigned long flags)
{
/*
* Only allow MTE on anonymous mappings as these are guaranteed to be
* backed by tags-capable memory. The vm_flags may be overridden by a
* filesystem supporting MTE (RAM-based).
*/
- if (system_supports_mte() && (flags & MAP_ANONYMOUS))
+ if (system_supports_mte() &&
+ ((flags & MAP_ANONYMOUS) || shmem_file(file)))
return VM_MTE_ALLOWED;
return 0;
}
-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)
static inline bool arch_validate_prot(unsigned long prot,
unsigned long addr __always_unused)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 5fc3af9f8f29..341174bf9106 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -26,10 +26,6 @@ void update_freq_counters_refs(void);
#define arch_scale_freq_invariant topology_scale_freq_invariant
#define arch_scale_freq_ref topology_get_freq_ref
-#ifdef CONFIG_ACPI_CPPC_LIB
-#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
-#endif
-
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 77006df20a75..6d21971ae559 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1367,6 +1367,7 @@ static void sve_init_regs(void)
} else {
fpsimd_to_sve(current);
current->thread.fp_type = FP_STATE_SVE;
+ fpsimd_flush_task_state(current);
}
}
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 487381164ff6..2def9d0dd3dd 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,48 +7,19 @@
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
-#include <asm/thread_info.h>
-
-/*
- * If we have SMCCC v1.3 and (as is likely) no SVE state in
- * the registers then set the SMCCC hint bit to say there's no
- * need to preserve it. Do this by directly adjusting the SMCCC
- * function value which is already stored in x0 ready to be called.
- */
-SYM_FUNC_START(__arm_smccc_sve_check)
-
- ldr_l x16, smccc_has_sve_hint
- cbz x16, 2f
-
- get_current_task x16
- ldr x16, [x16, #TSK_TI_FLAGS]
- tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
- tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
-
-1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
-
-2: ret
-SYM_FUNC_END(__arm_smccc_sve_check)
-EXPORT_SYMBOL(__arm_smccc_sve_check)
.macro SMCCC instr
- stp x29, x30, [sp, #-16]!
- mov x29, sp
-alternative_if ARM64_SVE
- bl __arm_smccc_sve_check
-alternative_else_nop_endif
\instr #0
- ldr x4, [sp, #16]
+ ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
- ldr x4, [sp, #24]
+ ldr x4, [sp, #8]
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne 1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
-1: ldp x29, x30, [sp], #16
- ret
+1: ret
.endm
/*
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
index c6bce5fbff57..7f52bd31b9d4 100644
--- a/arch/loongarch/include/asm/kasan.h
+++ b/arch/loongarch/include/asm/kasan.h
@@ -25,6 +25,7 @@
/* 64-bit segment value. */
#define XKPRANGE_UC_SEG (0x8000)
#define XKPRANGE_CC_SEG (0x9000)
+#define XKPRANGE_WC_SEG (0xa000)
#define XKVRANGE_VC_SEG (0xffff)
/* Cached */
@@ -41,20 +42,28 @@
#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+/* WriteCombine */
+#define XKPRANGE_WC_START WRITECOMBINE_BASE
+#define XKPRANGE_WC_SIZE XRANGE_SIZE
+#define XKPRANGE_WC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END
+#define XKPRANGE_WC_SHADOW_SIZE (XKPRANGE_WC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_WC_SHADOW_END (XKPRANGE_WC_KASAN_OFFSET + XKPRANGE_WC_SHADOW_SIZE)
+
/* VMALLOC (Cached or UnCached) */
#define XKVRANGE_VC_START MODULES_VADDR
#define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
-#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_WC_SHADOW_END
#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
/* KAsan shadow memory start right after vmalloc. */
#define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE)
#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
-#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+#define KASAN_SHADOW_END (round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) - 1)
#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKPRANGE_WC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_WC_KASAN_OFFSET)
#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
extern bool kasan_early_stage;
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index e85df33f11c7..8f21567a3188 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -113,10 +113,7 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
-#define VM_DATA_DEFAULT_FLAGS \
- (VM_READ | VM_WRITE | \
- ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index f1a74b80f22c..382a09a7152c 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -58,48 +58,48 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
return ioremap_cache(phys, size);
}
-static int cpu_enumerated = 0;
-
#ifdef CONFIG_SMP
-static int set_processor_mask(u32 id, u32 flags)
+static int set_processor_mask(u32 id, u32 pass)
{
- int nr_cpus;
- int cpu, cpuid = id;
-
- if (!cpu_enumerated)
- nr_cpus = NR_CPUS;
- else
- nr_cpus = nr_cpu_ids;
+ int cpu = -1, cpuid = id;
- if (num_processors >= nr_cpus) {
+ if (num_processors >= NR_CPUS) {
pr_warn(PREFIX "nr_cpus limit of %i reached."
- " processor 0x%x ignored.\n", nr_cpus, cpuid);
+ " processor 0x%x ignored.\n", NR_CPUS, cpuid);
return -ENODEV;
}
+
if (cpuid == loongson_sysconf.boot_cpu_id)
cpu = 0;
- else
- cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS);
-
- if (!cpu_enumerated)
- set_cpu_possible(cpu, true);
- if (flags & ACPI_MADT_ENABLED) {
+ switch (pass) {
+ case 1: /* Pass 1 handle enabled processors */
+ if (cpu < 0)
+ cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS);
num_processors++;
set_cpu_present(cpu, true);
- __cpu_number_map[cpuid] = cpu;
- __cpu_logical_map[cpu] = cpuid;
- } else
+ break;
+ case 2: /* Pass 2 handle disabled processors */
+ if (cpu < 0)
+ cpu = find_first_zero_bit(cpumask_bits(cpu_possible_mask), NR_CPUS);
disabled_cpus++;
+ break;
+ default:
+ return cpu;
+ }
+
+ set_cpu_possible(cpu, true);
+ __cpu_number_map[cpuid] = cpu;
+ __cpu_logical_map[cpu] = cpuid;
return cpu;
}
#endif
static int __init
-acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long end)
+acpi_parse_p1_processor(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_core_pic *processor = NULL;
@@ -110,13 +110,30 @@ acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long en
acpi_table_print_madt_entry(&header->common);
#ifdef CONFIG_SMP
acpi_core_pic[processor->core_id] = *processor;
- set_processor_mask(processor->core_id, processor->flags);
+ if (processor->flags & ACPI_MADT_ENABLED)
+ set_processor_mask(processor->core_id, 1);
#endif
return 0;
}
static int __init
+acpi_parse_p2_processor(union acpi_subtable_headers *header, const unsigned long end)
+{
+ struct acpi_madt_core_pic *processor = NULL;
+
+ processor = (struct acpi_madt_core_pic *)header;
+ if (BAD_MADT_ENTRY(processor, end))
+ return -EINVAL;
+
+#ifdef CONFIG_SMP
+ if (!(processor->flags & ACPI_MADT_ENABLED))
+ set_processor_mask(processor->core_id, 2);
+#endif
+
+ return 0;
+}
+static int __init
acpi_parse_eio_master(union acpi_subtable_headers *header, const unsigned long end)
{
static int core = 0;
@@ -143,12 +160,14 @@ static void __init acpi_process_madt(void)
}
#endif
acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC,
- acpi_parse_processor, MAX_CORE_PIC);
+ acpi_parse_p1_processor, MAX_CORE_PIC);
+
+ acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC,
+ acpi_parse_p2_processor, MAX_CORE_PIC);
acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC,
acpi_parse_eio_master, MAX_IO_PICS);
- cpu_enumerated = 1;
loongson_sysconf.nr_cpus = num_processors;
}
@@ -310,6 +329,10 @@ static int __ref acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
int nid;
nid = acpi_get_node(handle);
+
+ if (nid != NUMA_NO_NODE)
+ nid = early_cpu_to_node(cpu);
+
if (nid != NUMA_NO_NODE) {
set_cpuid_to_node(physid, nid);
node_set(nid, numa_nodes_parsed);
@@ -324,12 +347,14 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu
{
int cpu;
- cpu = set_processor_mask(physid, ACPI_MADT_ENABLED);
- if (cpu < 0) {
+ cpu = cpu_number_map(physid);
+ if (cpu < 0 || cpu >= nr_cpu_ids) {
pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
- return cpu;
+ return -ERANGE;
}
+ num_processors++;
+ set_cpu_present(cpu, true);
acpi_map_cpu2node(handle, cpu, physid);
*pcpu = cpu;
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index a5fc61f8b348..e5a39bbad078 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -51,11 +51,18 @@ static u64 paravt_steal_clock(int cpu)
}
#ifdef CONFIG_SMP
+static struct smp_ops native_ops;
+
static void pv_send_ipi_single(int cpu, unsigned int action)
{
int min, old;
irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
+ if (unlikely(action == ACTION_BOOT_CPU)) {
+ native_ops.send_ipi_single(cpu, action);
+ return;
+ }
+
old = atomic_fetch_or(BIT(action), &info->message);
if (old)
return;
@@ -75,6 +82,11 @@ static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
if (cpumask_empty(mask))
return;
+ if (unlikely(action == ACTION_BOOT_CPU)) {
+ native_ops.send_ipi_mask(mask, action);
+ return;
+ }
+
action = BIT(action);
for_each_cpu(i, mask) {
info = &per_cpu(irq_stat, i);
@@ -147,6 +159,8 @@ static void pv_init_ipi(void)
{
int r, swi;
+ /* Init native ipi irq for ACTION_BOOT_CPU */
+ native_ops.init_ipi();
swi = get_percpu_irq(INT_SWI0);
if (swi < 0)
panic("SWI0 IRQ mapping failed\n");
@@ -193,6 +207,7 @@ int __init pv_ipi_init(void)
return 0;
#ifdef CONFIG_SMP
+ native_ops = mp_ops;
mp_ops.init_ipi = pv_init_ipi;
mp_ops.send_ipi_single = pv_send_ipi_single;
mp_ops.send_ipi_mask = pv_send_ipi_mask;
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 9afc2d8b3414..5d59e9ce2772 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -302,7 +302,7 @@ static void __init fdt_smp_setup(void)
__cpu_number_map[cpuid] = cpu;
__cpu_logical_map[cpu] = cpuid;
- early_numa_add_cpu(cpu, 0);
+ early_numa_add_cpu(cpuid, 0);
set_cpuid_to_node(cpuid, 0);
}
@@ -331,11 +331,11 @@ void __init loongson_prepare_cpus(unsigned int max_cpus)
int i = 0;
parse_acpi_topology();
+ cpu_data[0].global_id = cpu_logical_map(0);
for (i = 0; i < loongson_sysconf.nr_cpus; i++) {
set_cpu_present(i, true);
csr_mail_send(0, __cpu_logical_map[i], 0);
- cpu_data[i].global_id = __cpu_logical_map[i];
}
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -380,6 +380,7 @@ void loongson_init_secondary(void)
cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
cpu_data[cpu].core = pptt_enabled ? cpu_data[cpu].core :
cpu_logical_map(cpu) % loongson_sysconf.cores_per_package;
+ cpu_data[cpu].global_id = cpu_logical_map(cpu);
}
void loongson_smp_finish(void)
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
index 427d6b1aec09..d2681272d8f0 100644
--- a/arch/loongarch/mm/kasan_init.c
+++ b/arch/loongarch/mm/kasan_init.c
@@ -13,6 +13,13 @@
static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+#ifdef __PAGETABLE_P4D_FOLDED
+#define __pgd_none(early, pgd) (0)
+#else
+#define __pgd_none(early, pgd) (early ? (pgd_val(pgd) == 0) : \
+(__pa(pgd_val(pgd)) == (unsigned long)__pa(kasan_early_shadow_p4d)))
+#endif
+
#ifdef __PAGETABLE_PUD_FOLDED
#define __p4d_none(early, p4d) (0)
#else
@@ -55,6 +62,9 @@ void *kasan_mem_to_shadow(const void *addr)
case XKPRANGE_UC_SEG:
offset = XKPRANGE_UC_SHADOW_OFFSET;
break;
+ case XKPRANGE_WC_SEG:
+ offset = XKPRANGE_WC_SHADOW_OFFSET;
+ break;
case XKVRANGE_VC_SEG:
offset = XKVRANGE_VC_SHADOW_OFFSET;
break;
@@ -79,6 +89,8 @@ const void *kasan_shadow_to_mem(const void *shadow_addr)
if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+ else if (addr >= XKPRANGE_WC_SHADOW_OFFSET)
+ return (void *)(((addr - XKPRANGE_WC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_WC_START);
else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
@@ -142,6 +154,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node,
return pud_offset(p4dp, addr);
}
+static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node, bool early)
+{
+ if (__pgd_none(early, pgdp_get(pgdp))) {
+ phys_addr_t p4d_phys = early ?
+ __pa_symbol(kasan_early_shadow_p4d) : kasan_alloc_zeroed_page(node);
+ if (!early)
+ memcpy(__va(p4d_phys), kasan_early_shadow_p4d, sizeof(kasan_early_shadow_p4d));
+ pgd_populate(&init_mm, pgdp, (p4d_t *)__va(p4d_phys));
+ }
+
+ return p4d_offset(pgdp, addr);
+}
+
static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
unsigned long end, int node, bool early)
{
@@ -178,19 +203,19 @@ static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
do {
next = pud_addr_end(addr, end);
kasan_pmd_populate(pudp, addr, next, node, early);
- } while (pudp++, addr = next, addr != end);
+ } while (pudp++, addr = next, addr != end && __pud_none(early, READ_ONCE(*pudp)));
}
static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
- p4d_t *p4dp = p4d_offset(pgdp, addr);
+ p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early);
do {
next = p4d_addr_end(addr, end);
kasan_pud_populate(p4dp, addr, next, node, early);
- } while (p4dp++, addr = next, addr != end);
+ } while (p4dp++, addr = next, addr != end && __p4d_none(early, READ_ONCE(*p4dp)));
}
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
@@ -218,7 +243,7 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
asmlinkage void __init kasan_early_init(void)
{
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
- BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END + 1, PGDIR_SIZE));
}
static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
@@ -233,7 +258,7 @@ static void __init clear_pgds(unsigned long start, unsigned long end)
* swapper_pg_dir. pgd_clear() can't be used
* here because it's nop on 2,3-level pagetable setups
*/
- for (; start < end; start += PGDIR_SIZE)
+ for (; start < end; start = pgd_addr_end(start, end))
kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
}
@@ -243,6 +268,17 @@ void __init kasan_init(void)
phys_addr_t pa_start, pa_end;
/*
+ * If PGDIR_SIZE is too large for cpu_vabits, KASAN_SHADOW_END will
+ * overflow UINTPTR_MAX and then looks like a user space address.
+ * For example, PGDIR_SIZE of CONFIG_4KB_4LEVEL is 2^39, which is too
+ * large for Loongson-2K series whose cpu_vabits = 39.
+ */
+ if (KASAN_SHADOW_END < vm_map_base) {
+ pr_warn("PGDIR_SIZE too large for cpu_vabits, KernelAddressSanitizer disabled.\n");
+ return;
+ }
+
+ /*
* PGD was populated as invalid_pmd_table or invalid_pud_table
* in pagetable_init() which depends on how many levels of page
* table you are using, but we had to clean the gpd of kasan
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index 89b6beeda0b8..663f587dc789 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -2,6 +2,7 @@
#ifndef __ASM_MMAN_H__
#define __ASM_MMAN_H__
+#include <linux/fs.h>
#include <uapi/asm/mman.h>
/* PARISC cannot allow mdwe as it needs writable stacks */
@@ -11,7 +12,7 @@ static inline bool arch_memory_deny_write_exec_supported(void)
}
#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
+static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags)
{
/*
* The stack on parisc grows upwards, so if userspace requests memory
@@ -23,6 +24,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
return 0;
}
-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)
#endif /* __ASM_MMAN_H__ */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ba0492f9de65..ad8dc4ccdaab 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4898,6 +4898,18 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
BOOK3S_INTERRUPT_EXTERNAL, 0);
else
lpcr |= LPCR_MER;
+ } else {
+ /*
+ * L1's copy of L2's LPCR (vcpu->arch.vcore->lpcr) can get its MER bit
+ * unexpectedly set - for e.g. during NMI handling when all register
+ * states are synchronized from L0 to L1. L1 needs to inform L0 about
+ * MER=1 only when there are pending external interrupts.
+ * In the above if check, MER bit is set if there are pending
+ * external interrupts. Hence, explicity mask off MER bit
+ * here as otherwise it may generate spurious interrupts in L2 KVM
+ * causing an endless loop, which results in L2 guest getting hung.
+ */
+ lpcr &= ~LPCR_MER;
}
} else if (vcpu->arch.pending_exceptions ||
vcpu->arch.doorbell_request ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index aef70336d624..92f3664dd933 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -305,9 +305,4 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled
extern void arch_scale_freq_tick(void);
#define arch_scale_freq_tick arch_scale_freq_tick
-#ifdef CONFIG_ACPI_CPPC_LIB
-void init_freq_invariance_cppc(void);
-#define arch_init_invariance_cppc init_freq_invariance_cppc
-#endif
-
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index 956984054bf3..aab9d0570841 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -110,7 +110,7 @@ static void amd_set_max_freq_ratio(void)
static DEFINE_MUTEX(freq_invariance_lock);
-void init_freq_invariance_cppc(void)
+static inline void init_freq_invariance_cppc(void)
{
static bool init_done;
@@ -127,6 +127,11 @@ void init_freq_invariance_cppc(void)
mutex_unlock(&freq_invariance_lock);
}
+void acpi_processor_init_invariance_cppc(void)
+{
+ init_freq_invariance_cppc();
+}
+
/*
* Get the highest performance register value.
* @cpu: CPU from which to get highest performance.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2098dc689088..95c6beb8ce27 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2629,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (apic->apicv_active) {
- /* irr_pending is always true when apicv is activated. */
- apic->irr_pending = true;
+ /*
+ * When APICv is enabled, KVM must always search the IRR for a pending
+ * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
+ * isn't running. If APICv is disabled, KVM _should_ search the IRR
+ * for a pending IRQ. But KVM currently doesn't ensure *all* hardware,
+ * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
+ * the IRR at this time could race with IRQ delivery from hardware that
+ * still sees APICv as being enabled.
+ *
+ * FIXME: Ensure other vCPUs and devices observe the change in APICv
+ * state prior to updating KVM's metadata caches, so that KVM
+ * can safely search the IRR and set irr_pending accordingly.
+ */
+ apic->irr_pending = true;
+
+ if (apic->apicv_active)
apic->isr_count = 1;
- } else {
- /*
- * Don't clear irr_pending, searching the IRR can race with
- * updates from the CPU as APICv is still active from hardware's
- * perspective. The flag will be cleared as appropriate when
- * KVM injects the interrupt.
- */
+ else
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
- }
+
apic->highest_isr_cache = -1;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0b851ef937f2..fb854cf20ac3 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -450,8 +450,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
goto e_free;
/* This needs to happen after SEV/SNP firmware initialization. */
- if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm))
- goto e_free;
+ if (vm_type == KVM_X86_SNP_VM) {
+ ret = snp_guest_req_init(kvm);
+ if (ret)
+ goto e_free;
+ }
INIT_LIST_HEAD(&sev->regions_list);
INIT_LIST_HEAD(&sev->mirror_vms);
@@ -2212,10 +2215,6 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (sev->snp_context)
return -EINVAL;
- sev->snp_context = snp_context_create(kvm, argp);
- if (!sev->snp_context)
- return -ENOTTY;
-
if (params.flags)
return -EINVAL;
@@ -2230,6 +2229,10 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET)
return -EINVAL;
+ sev->snp_context = snp_context_create(kvm, argp);
+ if (!sev->snp_context)
+ return -ENOTTY;
+
start.gctx_paddr = __psp_pa(sev->snp_context);
start.policy = params.policy;
memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a8e7bc04d9bf..931a7361c30f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept);
/*
- * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
- * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
- * full TLB flush from the guest's perspective. This is required even
- * if VPID is disabled in the host as KVM may need to synchronize the
- * MMU in response to the guest TLB flush.
+ * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
+ * same VPID as the host, and so architecturally, linear and combined
+ * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM
+ * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2,
+ * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This
+ * is required if VPID is disabled in KVM, as a TLB flush (there are no
+ * VPIDs) still occurs from L1's perspective, and KVM may need to
+ * synchronize the MMU in response to the guest TLB flush.
*
* Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
* EPT is a special snowflake, as guest-physical mappings aren't
@@ -2315,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
+ /*
+ * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
+ * same VPID as the host. Emulate this behavior by using vpid01 for L2
+ * if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter
+ * and VM-Exit are architecturally required to flush VPID=0, but *only*
+ * VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the
+ * required flushes), but doing so would cause KVM to over-flush. E.g.
+ * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled,
+ * and then runs L2 X again, then KVM can and should retain TLB entries
+ * for VPID12=1.
+ */
if (enable_vpid) {
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
@@ -5950,6 +5964,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ /*
+ * Always flush the effective vpid02, i.e. never flush the current VPID
+ * and never explicitly flush vpid01. INVVPID targets a VPID, not a
+ * VMCS, and so whether or not the current vmcs12 has VPID enabled is
+ * irrelevant (and there may not be a loaded vmcs12).
+ */
vpid02 = nested_get_vpid02(vcpu);
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 81ed596e4454..d28618e9277e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -217,9 +217,11 @@ module_param(ple_window_shrink, uint, 0444);
static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, uint, 0444);
-/* Default is SYSTEM mode, 1 for host-guest mode */
+/* Default is SYSTEM mode, 1 for host-guest mode (which is BROKEN) */
int __read_mostly pt_mode = PT_MODE_SYSTEM;
+#ifdef CONFIG_BROKEN
module_param(pt_mode, int, S_IRUGO);
+#endif
struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
@@ -3216,7 +3218,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu)))
return nested_get_vpid02(vcpu);
return to_vmx(vcpu)->vpid;
}