aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/uapi/asm/socket.h6
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/include/asm/cmpxchg.h6
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi12
-rw-r--r--arch/arm/boot/dts/ti/omap/omap3-n900.dts2
-rw-r--r--arch/arm/crypto/Kconfig14
-rw-r--r--arch/arm/crypto/aes-ce-glue.c2
-rw-r--r--arch/arm/crypto/aes-cipher-glue.c5
-rw-r--r--arch/arm/crypto/aes-cipher.h13
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c133
-rw-r--r--arch/arm/include/asm/arm_pmuv3.h20
-rw-r--r--arch/arm/include/asm/cpu.h1
-rw-r--r--arch/arm/include/asm/dma-iommu.h2
-rw-r--r--arch/arm/include/asm/hypervisor.h2
-rw-r--r--arch/arm/include/asm/pgtable-3level-hwdef.h5
-rw-r--r--arch/arm/kernel/entry-armv.S12
-rw-r--r--arch/arm/kernel/setup.c14
-rw-r--r--arch/arm/mm/dma-mapping.c12
-rw-r--r--arch/arm/mm/mmu.c6
-rw-r--r--arch/arm/vfp/vfpinstr.h48
-rw-r--r--arch/arm64/Kconfig26
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts12
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx93.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx95.dtsi14
-rw-r--r--arch/arm64/boot/dts/qcom/ipq5332.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts42
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-crd.dts70
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts54
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-qcp.dts53
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100.dtsi21
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi36
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi7
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-base.dtsi6
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/poly1305-armv8.pl6
-rw-r--r--arch/arm64/include/asm/arm_pmuv3.h53
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/el2_setup.h25
-rw-r--r--arch/arm64/include/asm/esr.h93
-rw-r--r--arch/arm64/include/asm/fpsimd.h2
-rw-r--r--arch/arm64/include/asm/hwcap.h1
-rw-r--r--arch/arm64/include/asm/hypervisor.h11
-rw-r--r--arch/arm64/include/asm/io.h4
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_asm.h6
-rw-r--r--arch/arm64/include/asm/kvm_host.h34
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h6
-rw-r--r--arch/arm64/include/asm/kvm_nested.h40
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h42
-rw-r--r--arch/arm64/include/asm/mem_encrypt.h15
-rw-r--r--arch/arm64/include/asm/memory.h2
-rw-r--r--arch/arm64/include/asm/mman.h10
-rw-r--r--arch/arm64/include/asm/mmu.h2
-rw-r--r--arch/arm64/include/asm/mmu_context.h46
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h20
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h8
-rw-r--r--arch/arm64/include/asm/pgtable.h34
-rw-r--r--arch/arm64/include/asm/pkeys.h106
-rw-r--r--arch/arm64/include/asm/por.h33
-rw-r--r--arch/arm64/include/asm/processor.h6
-rw-r--r--arch/arm64/include/asm/ptdump.h43
-rw-r--r--arch/arm64/include/asm/set_memory.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h26
-rw-r--r--arch/arm64/include/asm/thread_info.h2
-rw-r--r--arch/arm64/include/asm/traps.h1
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h1
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/include/uapi/asm/mman.h9
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h13
-rw-r--r--arch/arm64/kernel/cpu_errata.c10
-rw-r--r--arch/arm64/kernel/cpufeature.c23
-rw-r--r--arch/arm64/kernel/cpuinfo.c3
-rw-r--r--arch/arm64/kernel/hibernate.c2
-rw-r--r--arch/arm64/kernel/pci.c191
-rw-r--r--arch/arm64/kernel/process.c97
-rw-r--r--arch/arm64/kernel/ptrace.c46
-rw-r--r--arch/arm64/kernel/signal.c62
-rw-r--r--arch/arm64/kernel/smp.c160
-rw-r--r--arch/arm64/kernel/stacktrace.c4
-rw-r--r--arch/arm64/kernel/traps.c26
-rw-r--r--arch/arm64/kvm/Kconfig17
-rw-r--r--arch/arm64/kvm/Makefile3
-rw-r--r--arch/arm64/kvm/arm.c15
-rw-r--r--arch/arm64/kvm/at.c1101
-rw-r--r--arch/arm64/kvm/emulate-nested.c81
-rw-r--r--arch/arm64/kvm/fpsimd.c5
-rw-r--r--arch/arm64/kvm/guest.c6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h3
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c6
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c48
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c97
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c3
-rw-r--r--arch/arm64/kvm/mmu.c9
-rw-r--r--arch/arm64/kvm/nested.c55
-rw-r--r--arch/arm64/kvm/pmu-emul.c14
-rw-r--r--arch/arm64/kvm/pmu.c87
-rw-r--r--arch/arm64/kvm/ptdump.c268
-rw-r--r--arch/arm64/kvm/sys_regs.c420
-rw-r--r--arch/arm64/kvm/sys_regs.h23
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c12
-rw-r--r--arch/arm64/kvm/vgic/vgic.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic.h7
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/contpte.c6
-rw-r--r--arch/arm64/mm/fault.c55
-rw-r--r--arch/arm64/mm/init.c12
-rw-r--r--arch/arm64/mm/ioremap.c23
-rw-r--r--arch/arm64/mm/mem_encrypt.c50
-rw-r--r--arch/arm64/mm/mmap.c11
-rw-r--r--arch/arm64/mm/mmu.c45
-rw-r--r--arch/arm64/mm/proc.S4
-rw-r--r--arch/arm64/mm/ptdump.c70
-rw-r--r--arch/arm64/mm/trans_pgd.c6
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/arm64/tools/sysreg30
-rw-r--r--arch/loongarch/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/dma-direct.h11
-rw-r--r--arch/loongarch/include/asm/hw_irq.h2
-rw-r--r--arch/loongarch/include/asm/kvm_csr.h6
-rw-r--r--arch/loongarch/include/asm/kvm_host.h37
-rw-r--r--arch/loongarch/include/asm/kvm_para.h12
-rw-r--r--arch/loongarch/include/asm/kvm_vcpu.h12
-rw-r--r--arch/loongarch/include/asm/loongarch.h11
-rw-r--r--arch/loongarch/include/asm/paravirt.h7
-rw-r--r--arch/loongarch/include/asm/qspinlock.h41
-rw-r--r--arch/loongarch/include/uapi/asm/Kbuild2
-rw-r--r--arch/loongarch/include/uapi/asm/kvm.h20
-rw-r--r--arch/loongarch/include/uapi/asm/kvm_para.h21
-rw-r--r--arch/loongarch/kernel/fpu.S4
-rw-r--r--arch/loongarch/kernel/irq.c3
-rw-r--r--arch/loongarch/kernel/paravirt.c47
-rw-r--r--arch/loongarch/kernel/setup.c2
-rw-r--r--arch/loongarch/kernel/smp.c4
-rw-r--r--arch/loongarch/kvm/exit.c46
-rw-r--r--arch/loongarch/kvm/switch.S4
-rw-r--r--arch/loongarch/kvm/timer.c7
-rw-r--r--arch/loongarch/kvm/vcpu.c342
-rw-r--r--arch/loongarch/kvm/vm.c69
-rw-r--r--arch/microblaze/mm/init.c5
-rw-r--r--arch/mips/alchemy/common/dma.c23
-rw-r--r--arch/mips/crypto/crc32-mips.c70
-rw-r--r--arch/mips/include/asm/cmp.h8
-rw-r--r--arch/mips/include/asm/dec/prom.h1
-rw-r--r--arch/mips/include/asm/mach-au1x00/au1000_dma.h1
-rw-r--r--arch/mips/include/asm/mips-boards/generic.h3
-rw-r--r--arch/mips/include/asm/mips_mt.h2
-rw-r--r--arch/mips/include/uapi/asm/sigcontext.h1
-rw-r--r--arch/mips/include/uapi/asm/socket.h6
-rw-r--r--arch/mips/jazz/setup.c2
-rw-r--r--arch/mips/kernel/cevt-r4k.c15
-rw-r--r--arch/mips/kernel/cpu-probe.c4
-rw-r--r--arch/mips/kernel/csrc-r4k.c4
-rw-r--r--arch/mips/kernel/mips-mt.c77
-rw-r--r--arch/mips/ralink/irq-gic.c1
-rw-r--r--arch/mips/ralink/timer-gic.c2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h6
-rw-r--r--arch/parisc/mm/init.c16
-rw-r--r--arch/powerpc/Kconfig4
-rw-r--r--arch/powerpc/crypto/curve25519-ppc64le-core.c1
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h12
-rw-r--r--arch/powerpc/kernel/vdso/vdso32.lds.S4
-rw-r--r--arch/powerpc/kernel/vdso/vdso64.lds.S4
-rw-r--r--arch/powerpc/lib/qspinlock.c10
-rw-r--r--arch/powerpc/mm/nohash/tlb_64e.c2
-rw-r--r--arch/powerpc/platforms/chrp/pegasos_eth.c7
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-common.dtsi6
-rw-r--r--arch/riscv/include/asm/irq.h55
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_pmu.h21
-rw-r--r--arch/riscv/include/asm/processor.h26
-rw-r--r--arch/riscv/include/asm/sbi.h20
-rw-r--r--arch/riscv/kernel/Makefile6
-rw-r--r--arch/riscv/kernel/acpi.c31
-rw-r--r--arch/riscv/kernel/sbi.c63
-rw-r--r--arch/riscv/kernel/sbi_ecall.c48
-rw-r--r--arch/riscv/kernel/traps_misaligned.c4
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c14
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c4
-rw-r--r--arch/riscv/mm/cacheflush.c12
-rw-r--r--arch/riscv/mm/init.c2
-rw-r--r--arch/s390/Kconfig13
-rw-r--r--arch/s390/boot/startup.c58
-rw-r--r--arch/s390/boot/vmem.c14
-rw-r--r--arch/s390/boot/vmlinux.lds.S7
-rw-r--r--arch/s390/include/asm/page.h3
-rw-r--r--arch/s390/kernel/setup.c19
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/tools/relocs.c2
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/include/asm/cmpxchg.h3
-rw-r--r--arch/sparc/include/uapi/asm/socket.h6
-rw-r--r--arch/x86/Kconfig128
-rw-r--r--arch/x86/coco/tdx/tdx.c1
-rw-r--r--arch/x86/crypto/Kconfig8
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c61
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S16
-rw-r--r--arch/x86/events/intel/core.c23
-rw-r--r--arch/x86/hyperv/hv_init.c5
-rw-r--r--arch/x86/include/asm/cpu_device_id.h20
-rw-r--r--arch/x86/include/asm/fpu/types.h7
-rw-r--r--arch/x86/include/asm/intel-family.h87
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mshyperv.h1
-rw-r--r--arch/x86/include/asm/page_64.h1
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/resctrl.h6
-rw-r--r--arch/x86/include/asm/sev.h2
-rw-r--r--arch/x86/include/asm/topology.h13
-rw-r--r--arch/x86/kernel/acpi/cppc.c172
-rw-r--r--arch/x86/kernel/apic/apic.c11
-rw-r--r--arch/x86/kernel/cpu/amd.c16
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c89
-rw-r--r--arch/x86/kernel/cpu/bugs.c60
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c18
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c18
-rw-r--r--arch/x86/kernel/cpu/mce/core.c38
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c192
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c21
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c8
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c34
-rw-r--r--arch/x86/kernel/fpu/xstate.c3
-rw-r--r--arch/x86/kernel/fpu/xstate.h4
-rw-r--r--arch/x86/kvm/Kconfig7
-rw-r--r--arch/x86/kvm/mmu/mmu.c12
-rw-r--r--arch/x86/kvm/mmu/spte.c6
-rw-r--r--arch/x86/kvm/mmu/spte.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c8
-rw-r--r--arch/x86/kvm/svm/svm.c15
-rw-r--r--arch/x86/kvm/vmx/vmx.c10
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/kaslr.c32
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/include/asm/cmpxchg.h2
260 files changed, 5574 insertions, 1792 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index e94f621903fe..251b73c5481e 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -140,6 +140,12 @@
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index fd0b0a0d4686..163608fd49d1 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -13,6 +13,7 @@ config ARC
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_NEED_CMPXCHG_1_EMU
select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
select ARCH_32BIT_OFF_T
select BUILDTIME_TABLE_SORT
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index e138fde067de..58045c898340 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -8,6 +8,7 @@
#include <linux/build_bug.h>
#include <linux/types.h>
+#include <linux/cmpxchg-emu.h>
#include <asm/barrier.h>
#include <asm/smp.h>
@@ -46,6 +47,9 @@
__typeof__(*(ptr)) _prev_; \
\
switch(sizeof((_p_))) { \
+ case 1: \
+ _prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *)_p_, (uintptr_t)_o_, (uintptr_t)_n_); \
+ break; \
case 4: \
_prev_ = __cmpxchg(_p_, _o_, _n_); \
break; \
@@ -65,8 +69,6 @@
__typeof__(*(ptr)) _prev_; \
unsigned long __flags; \
\
- BUILD_BUG_ON(sizeof(_p_) != 4); \
- \
/* \
* spin lock/unlock provide the needed smp_mb() before/after \
*/ \
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 54b2bb817a7f..0ec034933cae 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -64,6 +64,7 @@ config ARM
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_IRQ_IPI if SMP
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_DEVICES
select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_MULTI_HANDLER
@@ -117,7 +118,7 @@ config ARM
select HAVE_KERNEL_XZ
select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
select HAVE_KRETPROBES if HAVE_KPROBES
- select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_IS_LLD)
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPTPROBES if !THUMB2_KERNEL
diff --git a/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi
index 52a0f6ee426f..bcf4d9c870ec 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi
@@ -274,24 +274,24 @@
led@0 {
chan-name = "R";
- led-cur = /bits/ 8 <0x20>;
- max-cur = /bits/ 8 <0x60>;
+ led-cur = /bits/ 8 <0x6e>;
+ max-cur = /bits/ 8 <0xc8>;
reg = <0>;
color = <LED_COLOR_ID_RED>;
};
led@1 {
chan-name = "G";
- led-cur = /bits/ 8 <0x20>;
- max-cur = /bits/ 8 <0x60>;
+ led-cur = /bits/ 8 <0xbe>;
+ max-cur = /bits/ 8 <0xc8>;
reg = <1>;
color = <LED_COLOR_ID_GREEN>;
};
led@2 {
chan-name = "B";
- led-cur = /bits/ 8 <0x20>;
- max-cur = /bits/ 8 <0x60>;
+ led-cur = /bits/ 8 <0xbe>;
+ max-cur = /bits/ 8 <0xc8>;
reg = <2>;
color = <LED_COLOR_ID_BLUE>;
};
diff --git a/arch/arm/boot/dts/ti/omap/omap3-n900.dts b/arch/arm/boot/dts/ti/omap/omap3-n900.dts
index 07c5b963af78..4bde3342bb95 100644
--- a/arch/arm/boot/dts/ti/omap/omap3-n900.dts
+++ b/arch/arm/boot/dts/ti/omap/omap3-n900.dts
@@ -781,7 +781,7 @@
mount-matrix = "-1", "0", "0",
"0", "1", "0",
- "0", "0", "1";
+ "0", "0", "-1";
};
cam1: camera@3e {
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 847b7a003356..5ff49a5e9afc 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -166,10 +166,9 @@ config CRYPTO_AES_ARM
config CRYPTO_AES_ARM_BS
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)"
depends on KERNEL_MODE_NEON
+ select CRYPTO_AES_ARM
select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
- select CRYPTO_AES
- select CRYPTO_CBC
select CRYPTO_SIMD
help
Length-preserving ciphers: AES cipher algorithms (FIPS-197)
@@ -183,8 +182,15 @@ config CRYPTO_AES_ARM_BS
Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
and for XTS mode encryption, CBC and XTS mode decryption speedup is
around 25%. (CBC encryption speed is not affected by this driver.)
- This implementation does not rely on any lookup tables so it is
- believed to be invulnerable to cache timing attacks.
+
+ The bit sliced AES code does not use lookup tables, so it is believed
+ to be invulnerable to cache timing attacks. However, since the bit
+ sliced AES code cannot process single blocks efficiently, in certain
+ cases table-based code with some countermeasures against cache timing
+ attacks will still be used as a fallback method; specifically CBC
+ encryption (not CBC decryption), the encryption of XTS tweaks, XTS
+ ciphertext stealing when the message isn't a multiple of 16 bytes, and
+ CTR when invoked in a context in which NEON instructions are unusable.
config CRYPTO_AES_ARM_CE
tristate "Ciphers: AES, modes: ECB/CBC/CTS/CTR/XTS (ARMv8 Crypto Extensions)"
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index b668c97663ec..f5b66f4cf45d 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -711,7 +711,7 @@ static int __init aes_init(void)
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
- simd = simd_skcipher_create_compat(algname, drvname, basename);
+ simd = simd_skcipher_create_compat(aes_algs + i, algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
index 6dfaef2d8f91..29efb7833960 100644
--- a/arch/arm/crypto/aes-cipher-glue.c
+++ b/arch/arm/crypto/aes-cipher-glue.c
@@ -9,9 +9,10 @@
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <linux/module.h>
+#include "aes-cipher.h"
-asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
-asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
+EXPORT_SYMBOL_GPL(__aes_arm_encrypt);
+EXPORT_SYMBOL_GPL(__aes_arm_decrypt);
static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
diff --git a/arch/arm/crypto/aes-cipher.h b/arch/arm/crypto/aes-cipher.h
new file mode 100644
index 000000000000..d5db2b87eb69
--- /dev/null
+++ b/arch/arm/crypto/aes-cipher.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef ARM_CRYPTO_AES_CIPHER_H
+#define ARM_CRYPTO_AES_CIPHER_H
+
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds,
+ const u8 *in, u8 *out);
+asmlinkage void __aes_arm_decrypt(const u32 rk[], int rounds,
+ const u8 *in, u8 *out);
+
+#endif /* ARM_CRYPTO_AES_CIPHER_H */
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 201eb35dde37..f6be80b5938b 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -9,24 +9,22 @@
#include <asm/simd.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
-#include <crypto/internal/cipher.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <crypto/xts.h>
#include <linux/module.h>
+#include "aes-cipher.h"
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_DESCRIPTION("Bit sliced AES using NEON instructions");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("ecb(aes)");
-MODULE_ALIAS_CRYPTO("cbc(aes)-all");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
-
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
@@ -52,13 +50,13 @@ struct aesbs_ctx {
struct aesbs_cbc_ctx {
struct aesbs_ctx key;
- struct crypto_skcipher *enc_tfm;
+ struct crypto_aes_ctx fallback;
};
struct aesbs_xts_ctx {
struct aesbs_ctx key;
- struct crypto_cipher *cts_tfm;
- struct crypto_cipher *tweak_tfm;
+ struct crypto_aes_ctx fallback;
+ struct crypto_aes_ctx tweak_key;
};
struct aesbs_ctr_ctx {
@@ -129,37 +127,49 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_aes_ctx rk;
int err;
- err = aes_expandkey(&rk, in_key, key_len);
+ err = aes_expandkey(&ctx->fallback, in_key, key_len);
if (err)
return err;
ctx->key.rounds = 6 + key_len / 4;
kernel_neon_begin();
- aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
+ aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
kernel_neon_end();
- memzero_explicit(&rk, sizeof(rk));
- return crypto_skcipher_setkey(ctx->enc_tfm, in_key, key_len);
+ return 0;
}
static int cbc_encrypt(struct skcipher_request *req)
{
- struct skcipher_request *subreq = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
- skcipher_request_set_tfm(subreq, ctx->enc_tfm);
- skcipher_request_set_callback(subreq,
- skcipher_request_flags(req),
- NULL, NULL);
- skcipher_request_set_crypt(subreq, req->src, req->dst,
- req->cryptlen, req->iv);
+ err = skcipher_walk_virt(&walk, req, false);
- return crypto_skcipher_encrypt(subreq);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 *prev = walk.iv;
+
+ do {
+ crypto_xor_cpy(dst, src, prev, AES_BLOCK_SIZE);
+ __aes_arm_encrypt(ctx->fallback.key_enc,
+ ctx->key.rounds, dst, dst);
+ prev = dst;
+ src += AES_BLOCK_SIZE;
+ dst += AES_BLOCK_SIZE;
+ nbytes -= AES_BLOCK_SIZE;
+ } while (nbytes >= AES_BLOCK_SIZE);
+ memcpy(walk.iv, prev, AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+ return err;
}
static int cbc_decrypt(struct skcipher_request *req)
@@ -190,30 +200,6 @@ static int cbc_decrypt(struct skcipher_request *req)
return err;
}
-static int cbc_init(struct crypto_skcipher *tfm)
-{
- struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- unsigned int reqsize;
-
- ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_NEED_FALLBACK);
- if (IS_ERR(ctx->enc_tfm))
- return PTR_ERR(ctx->enc_tfm);
-
- reqsize = sizeof(struct skcipher_request);
- reqsize += crypto_skcipher_reqsize(ctx->enc_tfm);
- crypto_skcipher_set_reqsize(tfm, reqsize);
-
- return 0;
-}
-
-static void cbc_exit(struct crypto_skcipher *tfm)
-{
- struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->enc_tfm);
-}
-
static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -271,16 +257,8 @@ static int ctr_encrypt(struct skcipher_request *req)
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
{
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- unsigned long flags;
-
- /*
- * Temporarily disable interrupts to avoid races where
- * cachelines are evicted when the CPU is interrupted
- * to do something else.
- */
- local_irq_save(flags);
- aes_encrypt(&ctx->fallback, dst, src);
- local_irq_restore(flags);
+
+ __aes_arm_encrypt(ctx->fallback.key_enc, ctx->key.rounds, src, dst);
}
static int ctr_encrypt_sync(struct skcipher_request *req)
@@ -302,45 +280,23 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
return err;
key_len /= 2;
- err = crypto_cipher_setkey(ctx->cts_tfm, in_key, key_len);
+ err = aes_expandkey(&ctx->fallback, in_key, key_len);
if (err)
return err;
- err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len, key_len);
+ err = aes_expandkey(&ctx->tweak_key, in_key + key_len, key_len);
if (err)
return err;
return aesbs_setkey(tfm, in_key, key_len);
}
-static int xts_init(struct crypto_skcipher *tfm)
-{
- struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- ctx->cts_tfm = crypto_alloc_cipher("aes", 0, 0);
- if (IS_ERR(ctx->cts_tfm))
- return PTR_ERR(ctx->cts_tfm);
-
- ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
- if (IS_ERR(ctx->tweak_tfm))
- crypto_free_cipher(ctx->cts_tfm);
-
- return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
-}
-
-static void xts_exit(struct crypto_skcipher *tfm)
-{
- struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_cipher(ctx->tweak_tfm);
- crypto_free_cipher(ctx->cts_tfm);
-}
-
static int __xts_crypt(struct skcipher_request *req, bool encrypt,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[], int))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const int rounds = ctx->key.rounds;
int tail = req->cryptlen % AES_BLOCK_SIZE;
struct skcipher_request subreq;
u8 buf[2 * AES_BLOCK_SIZE];
@@ -364,7 +320,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
if (err)
return err;
- crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
+ __aes_arm_encrypt(ctx->tweak_key.key_enc, rounds, walk.iv, walk.iv);
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -378,7 +334,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
kernel_neon_begin();
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
- ctx->key.rounds, blocks, walk.iv, reorder_last_tweak);
+ rounds, blocks, walk.iv, reorder_last_tweak);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
@@ -396,9 +352,9 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
if (encrypt)
- crypto_cipher_encrypt_one(ctx->cts_tfm, buf, buf);
+ __aes_arm_encrypt(ctx->fallback.key_enc, rounds, buf, buf);
else
- crypto_cipher_decrypt_one(ctx->cts_tfm, buf, buf);
+ __aes_arm_decrypt(ctx->fallback.key_dec, rounds, buf, buf);
crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
@@ -439,8 +395,7 @@ static struct skcipher_alg aes_algs[] = { {
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.base.cra_module = THIS_MODULE,
- .base.cra_flags = CRYPTO_ALG_INTERNAL |
- CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
@@ -449,8 +404,6 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = aesbs_cbc_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- .init = cbc_init,
- .exit = cbc_exit,
}, {
.base.cra_name = "__ctr(aes)",
.base.cra_driver_name = "__ctr-aes-neonbs",
@@ -500,8 +453,6 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = aesbs_xts_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
- .init = xts_init,
- .exit = xts_exit,
} };
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
@@ -540,7 +491,7 @@ static int __init aes_init(void)
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
- simd = simd_skcipher_create_compat(algname, drvname, basename);
+ simd = simd_skcipher_create_compat(aes_algs + i, algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index a41b503b7dcd..f63ba8986b24 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -127,6 +127,12 @@ static inline u32 read_pmuver(void)
return (dfr0 >> 24) & 0xf;
}
+static inline bool pmuv3_has_icntr(void)
+{
+ /* FEAT_PMUv3_ICNTR not accessible for 32-bit */
+ return false;
+}
+
static inline void write_pmcr(u32 val)
{
write_sysreg(val, PMCR);
@@ -152,6 +158,13 @@ static inline u64 read_pmccntr(void)
return read_sysreg(PMCCNTR);
}
+static inline void write_pmicntr(u64 val) {}
+
+static inline u64 read_pmicntr(void)
+{
+ return 0;
+}
+
static inline void write_pmcntenset(u32 val)
{
write_sysreg(val, PMCNTENSET);
@@ -177,6 +190,13 @@ static inline void write_pmccfiltr(u32 val)
write_sysreg(val, PMCCFILTR);
}
+static inline void write_pmicfiltr(u64 val) {}
+
+static inline u64 read_pmicfiltr(void)
+{
+ return 0;
+}
+
static inline void write_pmovsclr(u32 val)
{
write_sysreg(val, PMOVSR);
diff --git a/arch/arm/include/asm/cpu.h b/arch/arm/include/asm/cpu.h
index bd6fdb4b922d..9d8863537aa5 100644
--- a/arch/arm/include/asm/cpu.h
+++ b/arch/arm/include/asm/cpu.h
@@ -11,7 +11,6 @@
#include <linux/cpu.h>
struct cpuinfo_arm {
- struct cpu cpu;
u32 cpuid;
#ifdef CONFIG_SMP
unsigned int loops_per_jiffy;
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
index 82ec1ccf1fee..2ce4c5683e6d 100644
--- a/arch/arm/include/asm/dma-iommu.h
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -24,7 +24,7 @@ struct dma_iommu_mapping {
};
struct dma_iommu_mapping *
-arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size);
+arm_iommu_create_mapping(struct device *dev, dma_addr_t base, u64 size);
void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
diff --git a/arch/arm/include/asm/hypervisor.h b/arch/arm/include/asm/hypervisor.h
index bd61502b9715..8a648e506540 100644
--- a/arch/arm/include/asm/hypervisor.h
+++ b/arch/arm/include/asm/hypervisor.h
@@ -7,4 +7,6 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
+static inline void kvm_arch_init_hyp_services(void) { };
+
#endif
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index dfab3e982cbf..944fc9955528 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -106,6 +106,11 @@
/*
* TTBCR register bits.
+ *
+ * The ORGN0 and IRGN0 bits enables different forms of caching when
+ * walking the translation table. Clearing these bits (which is claimed
+ * to be the reset default) means "normal memory, [outer|inner]
+ * non-cacheable"
*/
#define TTBCR_EAE (1 << 31)
#define TTBCR_IMP (1 << 30)
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index f01d23a220e6..1dfae1af8e31 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -29,6 +29,12 @@
#include "entry-header.S"
#include <asm/probes.h>
+#ifdef CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+#define RELOC_TEXT_NONE .reloc .text, R_ARM_NONE, .
+#else
+#define RELOC_TEXT_NONE
+#endif
+
/*
* Interrupt handling.
*/
@@ -1065,7 +1071,7 @@ vector_addrexcptn:
.globl vector_fiq
.section .vectors, "ax", %progbits
- .reloc .text, R_ARM_NONE, .
+ RELOC_TEXT_NONE
W(b) vector_rst
W(b) vector_und
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi )
@@ -1079,7 +1085,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi )
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
.section .vectors.bhb.loop8, "ax", %progbits
- .reloc .text, R_ARM_NONE, .
+ RELOC_TEXT_NONE
W(b) vector_rst
W(b) vector_bhb_loop8_und
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi )
@@ -1092,7 +1098,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi )
W(b) vector_bhb_loop8_fiq
.section .vectors.bhb.bpiall, "ax", %progbits
- .reloc .text, R_ARM_NONE, .
+ RELOC_TEXT_NONE
W(b) vector_rst
W(b) vector_bhb_bpiall_und
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi )
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7b33b157fca0..e6a857bf0ce6 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1201,20 +1201,10 @@ void __init setup_arch(char **cmdline_p)
mdesc->init_early();
}
-
-static int __init topology_init(void)
+bool arch_cpu_is_hotpluggable(int num)
{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu);
- cpuinfo->cpu.hotpluggable = platform_can_hotplug_cpu(cpu);
- register_cpu(&cpuinfo->cpu, cpu);
- }
-
- return 0;
+ return platform_can_hotplug_cpu(num);
}
-subsys_initcall(topology_init);
#ifdef CONFIG_HAVE_PROC_CPU
static int __init proc_cpu_init(void)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 5adf1769eee4..88c2d68a69c9 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1532,7 +1532,7 @@ static const struct dma_map_ops iommu_ops = {
/**
* arm_iommu_create_mapping
- * @bus: pointer to the bus holding the client device (for IOMMU calls)
+ * @dev: pointer to the client device (for IOMMU calls)
* @base: start address of the valid IO address space
* @size: maximum size of the valid IO address space
*
@@ -1544,7 +1544,7 @@ static const struct dma_map_ops iommu_ops = {
* arm_iommu_attach_device function.
*/
struct dma_iommu_mapping *
-arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size)
+arm_iommu_create_mapping(struct device *dev, dma_addr_t base, u64 size)
{
unsigned int bits = size >> PAGE_SHIFT;
unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
@@ -1585,9 +1585,11 @@ arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size)
spin_lock_init(&mapping->lock);
- mapping->domain = iommu_domain_alloc(bus);
- if (!mapping->domain)
+ mapping->domain = iommu_paging_domain_alloc(dev);
+ if (IS_ERR(mapping->domain)) {
+ err = PTR_ERR(mapping->domain);
goto err4;
+ }
kref_init(&mapping->kref);
return mapping;
@@ -1718,7 +1720,7 @@ static void arm_setup_iommu_dma_ops(struct device *dev)
dma_base = dma_range_map_min(dev->dma_range_map);
size = dma_range_map_max(dev->dma_range_map) - dma_base;
}
- mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
+ mapping = arm_iommu_create_mapping(dev, dma_base, size);
if (IS_ERR(mapping)) {
pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
size, dev_name(dev));
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 3f774856ca67..f85c177cdf8d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1638,7 +1638,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
{
pgtables_remap *lpae_pgtables_remap;
unsigned long pa_pgd;
- unsigned int cr, ttbcr;
+ u32 cr, ttbcr, tmp;
long long offset;
if (!mdesc->pv_fixup)
@@ -1688,7 +1688,9 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
cr = get_cr();
set_cr(cr & ~(CR_I | CR_C));
ttbcr = cpu_get_ttbcr();
- cpu_set_ttbcr(ttbcr & ~(3 << 8 | 3 << 10));
+ /* Disable all kind of caching of the translation table */
+ tmp = ttbcr & ~(TTBCR_ORGN0_MASK | TTBCR_IRGN0_MASK);
+ cpu_set_ttbcr(tmp);
flush_cache_all();
/*
diff --git a/arch/arm/vfp/vfpinstr.h b/arch/arm/vfp/vfpinstr.h
index 3c7938fd40aa..32090b0fb250 100644
--- a/arch/arm/vfp/vfpinstr.h
+++ b/arch/arm/vfp/vfpinstr.h
@@ -64,33 +64,37 @@
#ifdef CONFIG_AS_VFP_VMRS_FPINST
-#define fmrx(_vfp_) ({ \
- u32 __v; \
- asm(".fpu vfpv2\n" \
- "vmrs %0, " #_vfp_ \
- : "=r" (__v) : : "cc"); \
- __v; \
- })
-
-#define fmxr(_vfp_,_var_) \
- asm(".fpu vfpv2\n" \
- "vmsr " #_vfp_ ", %0" \
- : : "r" (_var_) : "cc")
+#define fmrx(_vfp_) ({ \
+ u32 __v; \
+ asm volatile (".fpu vfpv2\n" \
+ "vmrs %0, " #_vfp_ \
+ : "=r" (__v) : : "cc"); \
+ __v; \
+})
+
+#define fmxr(_vfp_, _var_) ({ \
+ asm volatile (".fpu vfpv2\n" \
+ "vmsr " #_vfp_ ", %0" \
+ : : "r" (_var_) : "cc"); \
+})
#else
#define vfpreg(_vfp_) #_vfp_
-#define fmrx(_vfp_) ({ \
- u32 __v; \
- asm("mrc p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmrx %0, " #_vfp_ \
- : "=r" (__v) : : "cc"); \
- __v; \
- })
-
-#define fmxr(_vfp_,_var_) \
- asm("mcr p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmxr " #_vfp_ ", %0" \
- : : "r" (_var_) : "cc")
+#define fmrx(_vfp_) ({ \
+ u32 __v; \
+ asm volatile ("mrc p10, 7, %0, " vfpreg(_vfp_) "," \
+ "cr0, 0 @ fmrx %0, " #_vfp_ \
+ : "=r" (__v) : : "cc"); \
+ __v; \
+})
+
+#define fmxr(_vfp_, _var_) ({ \
+ asm volatile ("mcr p10, 7, %0, " vfpreg(_vfp_) "," \
+ "cr0, 0 @ fmxr " #_vfp_ ", %0" \
+ : : "r" (_var_) : "cc"); \
+})
#endif
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a2f8ff354ca6..ed15b876fa74 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -34,6 +34,7 @@ config ARM64
select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
@@ -423,7 +424,7 @@ config AMPERE_ERRATUM_AC03_CPU_38
default y
help
This option adds an alternative code sequence to work around Ampere
- erratum AC03_CPU_38 on AmpereOne.
+ errata AC03_CPU_38 and AC04_CPU_10 on AmpereOne.
The affected design reports FEAT_HAFDBS as not implemented in
ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0
@@ -2137,6 +2138,29 @@ config ARM64_EPAN
if the cpu does not implement the feature.
endmenu # "ARMv8.7 architectural features"
+menu "ARMv8.9 architectural features"
+
+config ARM64_POE
+ prompt "Permission Overlay Extension"
+ def_bool y
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_PKEYS
+ help
+ The Permission Overlay Extension is used to implement Memory
+ Protection Keys. Memory Protection Keys provides a mechanism for
+ enforcing page-based protections, but without requiring modification
+ of the page tables when an application changes protection domains.
+
+ For details, see Documentation/core-api/protection-keys.rst
+
+ If unsure, say y.
+
+config ARCH_PKEY_BITS
+ int
+ default 3
+
+endmenu # "ARMv8.9 architectural features"
+
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 6b6e3ee950e5..acf293310f7a 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -175,7 +175,7 @@
};
};
- core-cluster-thermal {
+ cluster-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index 17f4e3171120..ab4c919e3e16 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -214,7 +214,7 @@
};
};
- core-cluster-thermal {
+ cluster-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 3>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index 200e52622f99..55019866d6a2 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -182,7 +182,7 @@
};
};
- core-cluster-thermal {
+ cluster-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 3>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
index 8ce4b6aae79d..e3a7db21fe29 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
@@ -131,7 +131,7 @@
};
thermal-zones {
- core-cluster-thermal {
+ cluster-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 0>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index bde89de2576e..1b306d6802ce 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -122,7 +122,7 @@
};
};
- core-cluster1-thermal {
+ cluster1-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 4>;
@@ -151,7 +151,7 @@
};
};
- core-cluster2-thermal {
+ cluster2-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 5>;
@@ -180,7 +180,7 @@
};
};
- core-cluster3-thermal {
+ cluster3-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 6>;
@@ -209,7 +209,7 @@
};
};
- core-cluster4-thermal {
+ cluster4-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 7>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index 26c7ca31e22e..bd75a658767d 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -492,7 +492,7 @@
};
};
- ddr-cluster5-thermal {
+ ddr-ctrl5-thermal {
polling-delay-passive = <1000>;
polling-delay = <5000>;
thermal-sensors = <&tmu 1>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso
index bf3e04651ba0..353ace3601dc 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso
@@ -21,7 +21,7 @@
&gpio3 {
pinctrl-names = "default";
- pinctrcl-0 = <&pinctrl_gpio3_hog>;
+ pinctrl-0 = <&pinctrl_gpio3_hog>;
uart4_rs485_en {
gpio-hog;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso
index f4448cde0407..8a75d6783ad2 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso
@@ -22,7 +22,7 @@
&gpio3 {
pinctrl-names = "default";
- pinctrcl-0 = <&pinctrl_gpio3_hog>;
+ pinctrl-0 = <&pinctrl_gpio3_hog>;
uart4_rs485_en {
gpio-hog;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
index 17e2c19d8455..cc9b81d46188 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
@@ -211,13 +211,12 @@
simple-audio-card,cpu {
sound-dai = <&sai3>;
+ frame-master;
+ bitclock-master;
};
simple-audio-card,codec {
sound-dai = <&wm8962>;
- clocks = <&clk IMX8MP_CLK_IPP_DO_CLKO1>;
- frame-master;
- bitclock-master;
};
};
};
@@ -507,10 +506,9 @@
&sai3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_sai3>;
- assigned-clocks = <&clk IMX8MP_CLK_SAI3>,
- <&clk IMX8MP_AUDIO_PLL2> ;
- assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>;
- assigned-clock-rates = <12288000>, <361267200>;
+ assigned-clocks = <&clk IMX8MP_CLK_SAI3>;
+ assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL1_OUT>;
+ assigned-clock-rates = <12288000>;
fsl,sai-mclk-direction-output;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
index da8f19a646a9..e2ee9f5a042c 100644
--- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
@@ -499,7 +499,7 @@
pinctrl-0 = <&pinctrl_usdhc2_hs>, <&pinctrl_usdhc2_gpio>;
pinctrl-1 = <&pinctrl_usdhc2_uhs>, <&pinctrl_usdhc2_gpio>;
pinctrl-2 = <&pinctrl_usdhc2_uhs>, <&pinctrl_usdhc2_gpio>;
- cd-gpios = <&gpio3 00 GPIO_ACTIVE_LOW>;
+ cd-gpios = <&gpio3 0 GPIO_ACTIVE_LOW>;
vmmc-supply = <&reg_usdhc2_vmmc>;
bus-width = <4>;
no-sdio;
diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi
index edbd8cad35bc..72a9a5d4e27a 100644
--- a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi
@@ -19,7 +19,7 @@
linux,cma {
compatible = "shared-dma-pool";
reusable;
- alloc-ranges = <0 0x60000000 0 0x40000000>;
+ alloc-ranges = <0 0x80000000 0 0x40000000>;
size = <0 0x10000000>;
linux,cma-default;
};
@@ -156,6 +156,7 @@
&wdog3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_wdog>;
+ fsl,ext-reset-output;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index 4a3f42355cb8..a0993022c102 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -1105,7 +1105,7 @@
<&clk IMX93_CLK_SYS_PLL_PFD0_DIV2>;
assigned-clock-rates = <100000000>, <250000000>;
intf_mode = <&wakeupmix_gpr 0x28>;
- snps,clk-csr = <0>;
+ snps,clk-csr = <6>;
nvmem-cells = <&eth_mac2>;
nvmem-cell-names = "mac-address";
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi
index 1bbf9a0468f6..425272aa5a81 100644
--- a/arch/arm64/boot/dts/freescale/imx95.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx95.dtsi
@@ -27,7 +27,7 @@
reg = <0x0>;
enable-method = "psci";
#cooling-cells = <2>;
- power-domains = <&scmi_devpd IMX95_PERF_A55>;
+ power-domains = <&scmi_perf IMX95_PERF_A55>;
power-domain-names = "perf";
i-cache-size = <32768>;
i-cache-line-size = <64>;
@@ -44,7 +44,7 @@
reg = <0x100>;
enable-method = "psci";
#cooling-cells = <2>;
- power-domains = <&scmi_devpd IMX95_PERF_A55>;
+ power-domains = <&scmi_perf IMX95_PERF_A55>;
power-domain-names = "perf";
i-cache-size = <32768>;
i-cache-line-size = <64>;
@@ -61,7 +61,7 @@
reg = <0x200>;
enable-method = "psci";
#cooling-cells = <2>;
- power-domains = <&scmi_devpd IMX95_PERF_A55>;
+ power-domains = <&scmi_perf IMX95_PERF_A55>;
power-domain-names = "perf";
i-cache-size = <32768>;
i-cache-line-size = <64>;
@@ -78,7 +78,7 @@
reg = <0x300>;
enable-method = "psci";
#cooling-cells = <2>;
- power-domains = <&scmi_devpd IMX95_PERF_A55>;
+ power-domains = <&scmi_perf IMX95_PERF_A55>;
power-domain-names = "perf";
i-cache-size = <32768>;
i-cache-line-size = <64>;
@@ -93,7 +93,7 @@
device_type = "cpu";
compatible = "arm,cortex-a55";
reg = <0x400>;
- power-domains = <&scmi_devpd IMX95_PERF_A55>;
+ power-domains = <&scmi_perf IMX95_PERF_A55>;
power-domain-names = "perf";
enable-method = "psci";
#cooling-cells = <2>;
@@ -110,7 +110,7 @@
device_type = "cpu";
compatible = "arm,cortex-a55";
reg = <0x500>;
- power-domains = <&scmi_devpd IMX95_PERF_A55>;
+ power-domains = <&scmi_perf IMX95_PERF_A55>;
power-domain-names = "perf";
enable-method = "psci";
#cooling-cells = <2>;
@@ -187,7 +187,7 @@
compatible = "cache";
cache-size = <524288>;
cache-line-size = <64>;
- cache-sets = <1024>;
+ cache-sets = <512>;
cache-level = <3>;
cache-unified;
};
diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
index 573656587c0d..0a74ed4f72cc 100644
--- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
@@ -320,8 +320,8 @@
reg = <0x08af8800 0x400>;
interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 53 IRQ_TYPE_EDGE_BOTH>,
- <GIC_SPI 52 IRQ_TYPE_EDGE_BOTH>;
+ <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "pwr_event",
"dp_hs_phy_irq",
"dm_hs_phy_irq";
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts
index 7fb980fcb307..9caa14dda585 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts
@@ -278,6 +278,13 @@
vdd-l3-supply = <&vreg_s1f_0p7>;
vdd-s1-supply = <&vph_pwr>;
vdd-s2-supply = <&vph_pwr>;
+
+ vreg_l3i_0p8: ldo3 {
+ regulator-name = "vreg_l3i_0p8";
+ regulator-min-microvolt = <880000>;
+ regulator-max-microvolt = <920000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
};
regulators-7 {
@@ -423,11 +430,17 @@
};
&pcie4 {
+ perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>;
+ wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
+
+ pinctrl-0 = <&pcie4_default>;
+ pinctrl-names = "default";
+
status = "okay";
};
&pcie4_phy {
- vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-phy-supply = <&vreg_l3i_0p8>;
vdda-pll-supply = <&vreg_l3e_1p2>;
status = "okay";
@@ -517,7 +530,30 @@
bias-disable;
};
- pcie6a_default: pcie2a-default-state {
+ pcie4_default: pcie4-default-state {
+ clkreq-n-pins {
+ pins = "gpio147";
+ function = "pcie4_clk";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ perst-n-pins {
+ pins = "gpio146";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ wake-n-pins {
+ pins = "gpio148";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ pcie6a_default: pcie6a-default-state {
clkreq-n-pins {
pins = "gpio153";
function = "pcie6a_clk";
@@ -529,7 +565,7 @@
pins = "gpio152";
function = "gpio";
drive-strength = <2>;
- bias-pull-down;
+ bias-disable;
};
wake-n-pins {
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
index 6152bcd0bc1f..e17ab8251e2a 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
@@ -268,7 +268,6 @@
pinctrl-0 = <&edp_reg_en>;
pinctrl-names = "default";
- regulator-always-on;
regulator-boot-on;
};
@@ -637,6 +636,14 @@
};
};
+&gpu {
+ status = "okay";
+
+ zap-shader {
+ firmware-name = "qcom/x1e80100/gen70500_zap.mbn";
+ };
+};
+
&i2c0 {
clock-frequency = <400000>;
@@ -724,9 +731,13 @@
aux-bus {
panel {
- compatible = "edp-panel";
+ compatible = "samsung,atna45af01", "samsung,atna33xc20";
+ enable-gpios = <&pmc8380_3_gpios 4 GPIO_ACTIVE_HIGH>;
power-supply = <&vreg_edp_3p3>;
+ pinctrl-0 = <&edp_bl_en>;
+ pinctrl-names = "default";
+
port {
edp_panel_in: endpoint {
remote-endpoint = <&mdss_dp3_out>;
@@ -756,11 +767,17 @@
};
&pcie4 {
+ perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>;
+ wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
+
+ pinctrl-0 = <&pcie4_default>;
+ pinctrl-names = "default";
+
status = "okay";
};
&pcie4_phy {
- vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-phy-supply = <&vreg_l3i_0p8>;
vdda-pll-supply = <&vreg_l3e_1p2>;
status = "okay";
@@ -785,6 +802,16 @@
status = "okay";
};
+&pmc8380_3_gpios {
+ edp_bl_en: edp-bl-en-state {
+ pins = "gpio4";
+ function = "normal";
+ power-source = <1>; /* 1.8V */
+ input-disable;
+ output-enable;
+ };
+};
+
&qupv3_0 {
status = "okay";
};
@@ -931,7 +958,30 @@
bias-disable;
};
- pcie6a_default: pcie2a-default-state {
+ pcie4_default: pcie4-default-state {
+ clkreq-n-pins {
+ pins = "gpio147";
+ function = "pcie4_clk";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ perst-n-pins {
+ pins = "gpio146";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ wake-n-pins {
+ pins = "gpio148";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ pcie6a_default: pcie6a-default-state {
clkreq-n-pins {
pins = "gpio153";
function = "pcie6a_clk";
@@ -943,15 +993,15 @@
pins = "gpio152";
function = "gpio";
drive-strength = <2>;
- bias-pull-down;
+ bias-disable;
};
wake-n-pins {
- pins = "gpio154";
- function = "gpio";
- drive-strength = <2>;
- bias-pull-up;
- };
+ pins = "gpio154";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
};
tpad_default: tpad-default-state {
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts
index fbff558f5b07..1943bdbfb8c0 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts
@@ -625,16 +625,31 @@
};
&pcie4 {
+ perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>;
+ wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
+
+ pinctrl-0 = <&pcie4_default>;
+ pinctrl-names = "default";
+
status = "okay";
};
&pcie4_phy {
- vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-phy-supply = <&vreg_l3i_0p8>;
vdda-pll-supply = <&vreg_l3e_1p2>;
status = "okay";
};
+&pcie4_port0 {
+ wifi@0 {
+ compatible = "pci17cb,1107";
+ reg = <0x10000 0x0 0x0 0x0 0x0>;
+
+ qcom,ath12k-calibration-variant = "LES790";
+ };
+};
+
&pcie6a {
perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>;
@@ -782,7 +797,30 @@
bias-disable;
};
- pcie6a_default: pcie2a-default-state {
+ pcie4_default: pcie4-default-state {
+ clkreq-n-pins {
+ pins = "gpio147";
+ function = "pcie4_clk";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ perst-n-pins {
+ pins = "gpio146";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ wake-n-pins {
+ pins = "gpio148";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ pcie6a_default: pcie6a-default-state {
clkreq-n-pins {
pins = "gpio153";
function = "pcie6a_clk";
@@ -794,15 +832,15 @@
pins = "gpio152";
function = "gpio";
drive-strength = <2>;
- bias-pull-down;
+ bias-disable;
};
wake-n-pins {
- pins = "gpio154";
- function = "gpio";
- drive-strength = <2>;
- bias-pull-up;
- };
+ pins = "gpio154";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
};
tpad_default: tpad-default-state {
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
index 72a4f4138616..8098e6730ae5 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts
@@ -606,6 +606,14 @@
};
};
+&gpu {
+ status = "okay";
+
+ zap-shader {
+ firmware-name = "qcom/x1e80100/gen70500_zap.mbn";
+ };
+};
+
&lpass_tlmm {
spkr_01_sd_n_active: spkr-01-sd-n-active-state {
pins = "gpio12";
@@ -660,11 +668,17 @@
};
&pcie4 {
+ perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>;
+ wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>;
+
+ pinctrl-0 = <&pcie4_default>;
+ pinctrl-names = "default";
+
status = "okay";
};
&pcie4_phy {
- vdda-phy-supply = <&vreg_l3j_0p8>;
+ vdda-phy-supply = <&vreg_l3i_0p8>;
vdda-pll-supply = <&vreg_l3e_1p2>;
status = "okay";
@@ -804,7 +818,30 @@
bias-disable;
};
- pcie6a_default: pcie2a-default-state {
+ pcie4_default: pcie4-default-state {
+ clkreq-n-pins {
+ pins = "gpio147";
+ function = "pcie4_clk";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+
+ perst-n-pins {
+ pins = "gpio146";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ wake-n-pins {
+ pins = "gpio148";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+
+ pcie6a_default: pcie6a-default-state {
clkreq-n-pins {
pins = "gpio153";
function = "pcie6a_clk";
@@ -816,15 +853,15 @@
pins = "gpio152";
function = "gpio";
drive-strength = <2>;
- bias-pull-down;
+ bias-disable;
};
wake-n-pins {
- pins = "gpio154";
- function = "gpio";
- drive-strength = <2>;
- bias-pull-up;
- };
+ pins = "gpio154";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
};
wcd_default: wcd-reset-n-active-state {
diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
index 7bca5fcd7d52..cd732ef88cd8 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
@@ -2901,7 +2901,7 @@
dma-coherent;
- linux,pci-domain = <7>;
+ linux,pci-domain = <6>;
num-lanes = <2>;
interrupts = <GIC_SPI 773 IRQ_TYPE_LEVEL_HIGH>,
@@ -2959,6 +2959,7 @@
"link_down";
power-domains = <&gcc GCC_PCIE_6A_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie6a_phy>;
phy-names = "pciephy";
@@ -3022,7 +3023,7 @@
dma-coherent;
- linux,pci-domain = <5>;
+ linux,pci-domain = <4>;
num-lanes = <2>;
interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
@@ -3080,11 +3081,22 @@
"link_down";
power-domains = <&gcc GCC_PCIE_4_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie4_phy>;
phy-names = "pciephy";
status = "disabled";
+
+ pcie4_port0: pcie@0 {
+ device_type = "pci";
+ reg = <0x0 0x0 0x0 0x0 0x0>;
+ bus-range = <0x01 0xff>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+ };
};
pcie4_phy: phy@1c0e000 {
@@ -3155,9 +3167,10 @@
interconnects = <&gem_noc MASTER_GFX3D 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "gfx-mem";
+ status = "disabled";
+
zap-shader {
memory-region = <&gpu_microcode_mem>;
- firmware-name = "qcom/gen70500_zap.mbn";
};
gpu_opp_table: opp-table {
@@ -3288,7 +3301,7 @@
reg = <0x0 0x03da0000 0x0 0x40000>;
#iommu-cells = <2>;
#global-interrupts = <1>;
- interrupts = <GIC_SPI 673 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts = <GIC_SPI 674 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 678 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 679 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 680 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
index a608a219543e..3e08e2fd0a78 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
@@ -387,7 +387,7 @@
pmic {
pmic_int_l: pmic-int-l {
- rockchip,pins = <2 RK_PA6 RK_FUNC_GPIO &pcfg_pull_up>;
+ rockchip,pins = <0 RK_PA2 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index ccbe3a7a1d2c..d24444cdf54a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -154,6 +154,22 @@
};
};
+&gpio3 {
+ /*
+ * The Qseven BIOS_DISABLE signal on the RK3399-Q7 keeps the on-module
+ * eMMC and SPI flash powered-down initially (in fact it keeps the
+ * reset signal asserted). BIOS_DISABLE_OVERRIDE pin allows to override
+ * that signal so that eMMC and SPI can be used regardless of the state
+ * of the signal.
+ */
+ bios-disable-override-hog {
+ gpios = <RK_PD5 GPIO_ACTIVE_LOW>;
+ gpio-hog;
+ line-name = "bios_disable_override";
+ output-high;
+ };
+};
+
&gmac {
assigned-clocks = <&cru SCLK_RMII_SRC>;
assigned-clock-parents = <&clkin_gmac>;
@@ -409,6 +425,7 @@
&i2s0 {
pinctrl-0 = <&i2s0_2ch_bus>;
+ pinctrl-1 = <&i2s0_2ch_bus_bclk_off>;
rockchip,playback-channels = <2>;
rockchip,capture-channels = <2>;
status = "okay";
@@ -417,8 +434,8 @@
/*
* As Q7 does not specify neither a global nor a RX clock for I2S these
* signals are not used. Furthermore I2S0_LRCK_RX is used as GPIO.
- * Therefore we have to redefine the i2s0_2ch_bus definition to prevent
- * conflicts.
+ * Therefore we have to redefine the i2s0_2ch_bus and i2s0_2ch_bus_bclk_off
+ * definitions to prevent conflicts.
*/
&i2s0_2ch_bus {
rockchip,pins =
@@ -428,6 +445,14 @@
<3 RK_PD7 1 &pcfg_pull_none>;
};
+&i2s0_2ch_bus_bclk_off {
+ rockchip,pins =
+ <3 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>,
+ <3 RK_PD2 1 &pcfg_pull_none>,
+ <3 RK_PD3 1 &pcfg_pull_none>,
+ <3 RK_PD7 1 &pcfg_pull_none>;
+};
+
&io_domains {
status = "okay";
bt656-supply = <&vcc_1v8>;
@@ -449,9 +474,14 @@
&pinctrl {
pinctrl-names = "default";
- pinctrl-0 = <&q7_thermal_pin>;
+ pinctrl-0 = <&q7_thermal_pin &bios_disable_override_hog_pin>;
gpios {
+ bios_disable_override_hog_pin: bios-disable-override-hog-pin {
+ rockchip,pins =
+ <3 RK_PD5 RK_FUNC_GPIO &pcfg_pull_down>;
+ };
+
q7_thermal_pin: q7-thermal-pin {
rockchip,pins =
<0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index 4690be841a1c..c72b3a608edd 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -1592,10 +1592,9 @@
<&cru SRST_TSADCPHY>;
rockchip,grf = <&grf>;
rockchip,hw-tshut-temp = <95000>;
- pinctrl-names = "init", "default", "sleep";
- pinctrl-0 = <&tsadc_pin>;
- pinctrl-1 = <&tsadc_shutorg>;
- pinctrl-2 = <&tsadc_pin>;
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&tsadc_shutorg>;
+ pinctrl-1 = <&tsadc_pin>;
#thermal-sensor-cells = <1>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
index b6e4df180f0b..ee99166ebd46 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
@@ -582,14 +582,14 @@
};
vo0_grf: syscon@fd5a6000 {
- compatible = "rockchip,rk3588-vo-grf", "syscon";
+ compatible = "rockchip,rk3588-vo0-grf", "syscon";
reg = <0x0 0xfd5a6000 0x0 0x2000>;
clocks = <&cru PCLK_VO0GRF>;
};
vo1_grf: syscon@fd5a8000 {
- compatible = "rockchip,rk3588-vo-grf", "syscon";
- reg = <0x0 0xfd5a8000 0x0 0x100>;
+ compatible = "rockchip,rk3588-vo1-grf", "syscon";
+ reg = <0x0 0xfd5a8000 0x0 0x4000>;
clocks = <&cru PCLK_VO1GRF>;
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 7d32fca64996..362df9390263 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -887,6 +887,7 @@ CONFIG_DRM_PANEL_KHADAS_TS050=m
CONFIG_DRM_PANEL_MANTIX_MLAF057WE51=m
CONFIG_DRM_PANEL_NOVATEK_NT36672E=m
CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
+CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m
CONFIG_DRM_PANEL_SITRONIX_ST7703=m
CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m
CONFIG_DRM_PANEL_VISIONOX_VTDR6130=m
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
index cbc980fb02e3..22c9069c0650 100644
--- a/arch/arm64/crypto/poly1305-armv8.pl
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -473,7 +473,8 @@ poly1305_blocks_neon:
subs $len,$len,#64
ldp x9,x13,[$inp,#48]
add $in2,$inp,#96
- adr $zeros,.Lzeros
+ adrp $zeros,.Lzeros
+ add $zeros,$zeros,#:lo12:.Lzeros
lsl $padbit,$padbit,#24
add x15,$ctx,#48
@@ -885,10 +886,13 @@ poly1305_blocks_neon:
ret
.size poly1305_blocks_neon,.-poly1305_blocks_neon
+.pushsection .rodata
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
+.popsection
+
.align 2
#if !defined(__KERNEL__) && !defined(_WIN64)
.comm OPENSSL_armcap_P,4,4
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index a4697a0b6835..468a049bc63b 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -33,6 +33,14 @@ static inline void write_pmevtypern(int n, unsigned long val)
PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
}
+#define RETURN_READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static inline unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVTYPERN);
+ return 0;
+}
+
static inline unsigned long read_pmmir(void)
{
return read_cpuid(PMMIR_EL1);
@@ -46,6 +54,14 @@ static inline u32 read_pmuver(void)
ID_AA64DFR0_EL1_PMUVer_SHIFT);
}
+static inline bool pmuv3_has_icntr(void)
+{
+ u64 dfr1 = read_sysreg(id_aa64dfr1_el1);
+
+ return !!cpuid_feature_extract_unsigned_field(dfr1,
+ ID_AA64DFR1_EL1_PMICNTR_SHIFT);
+}
+
static inline void write_pmcr(u64 val)
{
write_sysreg(val, pmcr_el0);
@@ -71,22 +87,32 @@ static inline u64 read_pmccntr(void)
return read_sysreg(pmccntr_el0);
}
-static inline void write_pmcntenset(u32 val)
+static inline void write_pmicntr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICNTR_EL0);
+}
+
+static inline u64 read_pmicntr(void)
+{
+ return read_sysreg_s(SYS_PMICNTR_EL0);
+}
+
+static inline void write_pmcntenset(u64 val)
{
write_sysreg(val, pmcntenset_el0);
}
-static inline void write_pmcntenclr(u32 val)
+static inline void write_pmcntenclr(u64 val)
{
write_sysreg(val, pmcntenclr_el0);
}
-static inline void write_pmintenset(u32 val)
+static inline void write_pmintenset(u64 val)
{
write_sysreg(val, pmintenset_el1);
}
-static inline void write_pmintenclr(u32 val)
+static inline void write_pmintenclr(u64 val)
{
write_sysreg(val, pmintenclr_el1);
}
@@ -96,12 +122,27 @@ static inline void write_pmccfiltr(u64 val)
write_sysreg(val, pmccfiltr_el0);
}
-static inline void write_pmovsclr(u32 val)
+static inline u64 read_pmccfiltr(void)
+{
+ return read_sysreg(pmccfiltr_el0);
+}
+
+static inline void write_pmicfiltr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICFILTR_EL0);
+}
+
+static inline u64 read_pmicfiltr(void)
+{
+ return read_sysreg_s(SYS_PMICFILTR_EL0);
+}
+
+static inline void write_pmovsclr(u64 val)
{
write_sysreg(val, pmovsclr_el0);
}
-static inline u32 read_pmovsclr(void)
+static inline u64 read_pmovsclr(void)
{
return read_sysreg(pmovsclr_el0);
}
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 558434267271..3d261cc123c1 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -832,6 +832,12 @@ static inline bool system_supports_lpa2(void)
return cpus_have_final_cap(ARM64_HAS_LPA2);
}
+static inline bool system_supports_poe(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_POE) &&
+ alternative_has_cap_unlikely(ARM64_HAS_S1POE);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5fd7caea4419..5a7dfeb8e8eb 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -143,6 +143,7 @@
#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define AMPERE_CPU_PART_AMPERE1A 0xAC4
#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
@@ -212,6 +213,7 @@
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A)
#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index fd87c4b8f984..e0ffdf13a18b 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -165,42 +165,53 @@
mrs x1, id_aa64dfr0_el1
ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
cmp x1, #3
- b.lt .Lset_debug_fgt_\@
+ b.lt .Lskip_spe_fgt_\@
/* Disable PMSNEVFR_EL1 read and write traps */
orr x0, x0, #(1 << 62)
-.Lset_debug_fgt_\@:
+.Lskip_spe_fgt_\@:
msr_s SYS_HDFGRTR_EL2, x0
msr_s SYS_HDFGWTR_EL2, x0
mov x0, xzr
mrs x1, id_aa64pfr1_el1
ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
- cbz x1, .Lset_pie_fgt_\@
+ cbz x1, .Lskip_debug_fgt_\@
/* Disable nVHE traps of TPIDR2 and SMPRI */
orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
-.Lset_pie_fgt_\@:
+.Lskip_debug_fgt_\@:
mrs_s x1, SYS_ID_AA64MMFR3_EL1
ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
- cbz x1, .Lset_fgt_\@
+ cbz x1, .Lskip_pie_fgt_\@
/* Disable trapping of PIR_EL1 / PIRE0_EL1 */
orr x0, x0, #HFGxTR_EL2_nPIR_EL1
orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1
-.Lset_fgt_\@:
+.Lskip_pie_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4
+ cbz x1, .Lskip_poe_fgt_\@
+
+ /* Disable trapping of POR_EL0 */
+ orr x0, x0, #HFGxTR_EL2_nPOR_EL0
+
+.Lskip_poe_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
msr_s SYS_HFGWTR_EL2, x0
msr_s SYS_HFGITR_EL2, xzr
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
- cbz x1, .Lskip_fgt_\@
+ cbz x1, .Lskip_amu_fgt_\@
msr_s SYS_HAFGRTR_EL2, xzr
+
+.Lskip_amu_fgt_\@:
+
.Lskip_fgt_\@:
.endm
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 56c148890daf..da6d2c1c0b03 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -10,63 +10,63 @@
#include <asm/memory.h>
#include <asm/sysreg.h>
-#define ESR_ELx_EC_UNKNOWN (0x00)
-#define ESR_ELx_EC_WFx (0x01)
+#define ESR_ELx_EC_UNKNOWN UL(0x00)
+#define ESR_ELx_EC_WFx UL(0x01)
/* Unallocated EC: 0x02 */
-#define ESR_ELx_EC_CP15_32 (0x03)
-#define ESR_ELx_EC_CP15_64 (0x04)
-#define ESR_ELx_EC_CP14_MR (0x05)
-#define ESR_ELx_EC_CP14_LS (0x06)
-#define ESR_ELx_EC_FP_ASIMD (0x07)
-#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
-#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
+#define ESR_ELx_EC_CP15_32 UL(0x03)
+#define ESR_ELx_EC_CP15_64 UL(0x04)
+#define ESR_ELx_EC_CP14_MR UL(0x05)
+#define ESR_ELx_EC_CP14_LS UL(0x06)
+#define ESR_ELx_EC_FP_ASIMD UL(0x07)
+#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */
/* Unallocated EC: 0x0A - 0x0B */
-#define ESR_ELx_EC_CP14_64 (0x0C)
-#define ESR_ELx_EC_BTI (0x0D)
-#define ESR_ELx_EC_ILL (0x0E)
+#define ESR_ELx_EC_CP14_64 UL(0x0C)
+#define ESR_ELx_EC_BTI UL(0x0D)
+#define ESR_ELx_EC_ILL UL(0x0E)
/* Unallocated EC: 0x0F - 0x10 */
-#define ESR_ELx_EC_SVC32 (0x11)
-#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
-#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
+#define ESR_ELx_EC_SVC32 UL(0x11)
+#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */
/* Unallocated EC: 0x14 */
-#define ESR_ELx_EC_SVC64 (0x15)
-#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
-#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
-#define ESR_ELx_EC_SYS64 (0x18)
-#define ESR_ELx_EC_SVE (0x19)
-#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
+#define ESR_ELx_EC_SVC64 UL(0x15)
+#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */
+#define ESR_ELx_EC_SYS64 UL(0x18)
+#define ESR_ELx_EC_SVE UL(0x19)
+#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */
/* Unallocated EC: 0x1B */
-#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
-#define ESR_ELx_EC_SME (0x1D)
+#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */
+#define ESR_ELx_EC_SME UL(0x1D)
/* Unallocated EC: 0x1E */
-#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
-#define ESR_ELx_EC_IABT_LOW (0x20)
-#define ESR_ELx_EC_IABT_CUR (0x21)
-#define ESR_ELx_EC_PC_ALIGN (0x22)
+#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW UL(0x20)
+#define ESR_ELx_EC_IABT_CUR UL(0x21)
+#define ESR_ELx_EC_PC_ALIGN UL(0x22)
/* Unallocated EC: 0x23 */
-#define ESR_ELx_EC_DABT_LOW (0x24)
-#define ESR_ELx_EC_DABT_CUR (0x25)
-#define ESR_ELx_EC_SP_ALIGN (0x26)
-#define ESR_ELx_EC_MOPS (0x27)
-#define ESR_ELx_EC_FP_EXC32 (0x28)
+#define ESR_ELx_EC_DABT_LOW UL(0x24)
+#define ESR_ELx_EC_DABT_CUR UL(0x25)
+#define ESR_ELx_EC_SP_ALIGN UL(0x26)
+#define ESR_ELx_EC_MOPS UL(0x27)
+#define ESR_ELx_EC_FP_EXC32 UL(0x28)
/* Unallocated EC: 0x29 - 0x2B */
-#define ESR_ELx_EC_FP_EXC64 (0x2C)
+#define ESR_ELx_EC_FP_EXC64 UL(0x2C)
/* Unallocated EC: 0x2D - 0x2E */
-#define ESR_ELx_EC_SERROR (0x2F)
-#define ESR_ELx_EC_BREAKPT_LOW (0x30)
-#define ESR_ELx_EC_BREAKPT_CUR (0x31)
-#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
-#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
-#define ESR_ELx_EC_WATCHPT_LOW (0x34)
-#define ESR_ELx_EC_WATCHPT_CUR (0x35)
+#define ESR_ELx_EC_SERROR UL(0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW UL(0x30)
+#define ESR_ELx_EC_BREAKPT_CUR UL(0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33)
+#define ESR_ELx_EC_WATCHPT_LOW UL(0x34)
+#define ESR_ELx_EC_WATCHPT_CUR UL(0x35)
/* Unallocated EC: 0x36 - 0x37 */
-#define ESR_ELx_EC_BKPT32 (0x38)
+#define ESR_ELx_EC_BKPT32 UL(0x38)
/* Unallocated EC: 0x39 */
-#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
+#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */
/* Unallocated EC: 0x3B */
-#define ESR_ELx_EC_BRK64 (0x3C)
+#define ESR_ELx_EC_BRK64 UL(0x3C)
/* Unallocated EC: 0x3D - 0x3F */
-#define ESR_ELx_EC_MAX (0x3F)
+#define ESR_ELx_EC_MAX UL(0x3F)
#define ESR_ELx_EC_SHIFT (26)
#define ESR_ELx_EC_WIDTH (6)
@@ -122,8 +122,8 @@
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
/* Status codes for individual page table levels */
-#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + n)
-#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + n)
+#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
+#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n))
#define ESR_ELx_FSC_FAULT_nL (0x2C)
#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \
@@ -161,6 +161,7 @@
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_FSC_ADDRSZ (0x00)
+#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index bc69ac368d73..f2a84efc3618 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -155,8 +155,6 @@ extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
-extern u64 read_smcr_features(void);
-
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 4edd3b61df11..a775adddecf2 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -157,6 +157,7 @@
#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
+#define KERNEL_HWCAP_POE __khwcap2_feature(POE)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
index 0ae427f352c8..409e239834d1 100644
--- a/arch/arm64/include/asm/hypervisor.h
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -7,4 +7,15 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
+#ifdef CONFIG_ARM_PKVM_GUEST
+void pkvm_init_hyp_services(void);
+#else
+static inline void pkvm_init_hyp_services(void) { };
+#endif
+
+static inline void kvm_arch_init_hyp_services(void)
+{
+ pkvm_init_hyp_services();
+};
+
#endif
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 41fd90895dfc..1ada23a6ec19 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -271,6 +271,10 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count)
* I/O memory mapping functions.
*/
+typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
+ pgprot_t *prot);
+int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
+
#define ioremap_prot ioremap_prot
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index d81cc746e0eb..109a85ee6910 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,6 +107,7 @@
/* TCR_EL2 Registers bits */
#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
+#define TCR_EL2_HPD (1 << 24)
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 2181a11b9d92..b36a3b6cc011 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -10,6 +10,7 @@
#include <asm/hyp_image.h>
#include <asm/insn.h>
#include <asm/virt.h>
+#include <asm/sysreg.h>
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
@@ -235,6 +236,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
+extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
@@ -259,7 +263,7 @@ extern u64 __kvm_get_mdcr_el2(void);
asm volatile( \
" mrs %1, spsr_el2\n" \
" mrs %2, elr_el2\n" \
- "1: at "at_op", %3\n" \
+ "1: " __msr_s(at_op, "%3") "\n" \
" isb\n" \
" b 9f\n" \
"2: msr spsr_el2, %1\n" \
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a33f5996ca9f..329619c6fa96 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -446,6 +446,12 @@ enum vcpu_sysreg {
GCR_EL1, /* Tag Control Register */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+ POR_EL0, /* Permission Overlay Register 0 (EL0) */
+
+ /* FP/SIMD/SVE */
+ SVCR,
+ FPMR,
+
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -517,6 +523,8 @@ enum vcpu_sysreg {
VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
+ VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
+
VNCR(HFGRTR_EL2),
VNCR(HFGWTR_EL2),
VNCR(HFGITR_EL2),
@@ -530,6 +538,8 @@ enum vcpu_sysreg {
VNCR(CNTP_CVAL_EL0),
VNCR(CNTP_CTL_EL0),
+ VNCR(ICH_HCR_EL2),
+
NR_SYS_REGS /* Nothing after this line! */
};
@@ -595,6 +605,16 @@ struct kvm_host_data {
struct cpu_sve_state *sve_state;
};
+ union {
+ /* HYP VA pointer to the host storage for FPMR */
+ u64 *fpmr_ptr;
+ /*
+ * Used by pKVM only, as it needs to provide storage
+ * for the host
+ */
+ u64 fpmr;
+ };
+
/* Ownership of the FP regs */
enum {
FP_STATE_FREE,
@@ -664,8 +684,6 @@ struct kvm_vcpu_arch {
void *sve_state;
enum fp_type fp_type;
unsigned int sve_max_vl;
- u64 svcr;
- u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
@@ -1330,12 +1348,12 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
-void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
-void kvm_clr_pmu_events(u32 clr);
+void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
+void kvm_clr_pmu_events(u64 clr);
bool kvm_set_pmuserenr(u64 val);
#else
-static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
-static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
+static inline void kvm_clr_pmu_events(u64 clr) {}
static inline bool kvm_set_pmuserenr(u64 val)
{
return false;
@@ -1473,4 +1491,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
(pa + pi + pa3) == 1; \
})
+#define kvm_has_fpmr(k) \
+ (system_supports_fpmr() && \
+ kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP))
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 216ca424bb16..cd4087fbda9a 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -352,5 +352,11 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return &kvm->arch.mmu != mmu;
}
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
+#else
+static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 5b06c31035a2..e8bc6d67aba2 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -85,7 +85,7 @@ struct kvm_s2_trans {
bool readable;
int level;
u32 esr;
- u64 upper_attr;
+ u64 desc;
};
static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
@@ -115,7 +115,7 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
{
- return !(trans->upper_attr & BIT(54));
+ return !(trans->desc & BIT(54));
}
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -205,4 +205,40 @@ static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans)
return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level);
}
+/* Adjust alignment for the contiguous bit as per StageOA() */
+#define contiguous_bit_shift(d, wi, l) \
+ ({ \
+ u8 shift = 0; \
+ \
+ if ((d) & PTE_CONT) { \
+ switch (BIT((wi)->pgshift)) { \
+ case SZ_4K: \
+ shift = 4; \
+ break; \
+ case SZ_16K: \
+ shift = (l) == 2 ? 5 : 7; \
+ break; \
+ case SZ_64K: \
+ shift = 5; \
+ break; \
+ } \
+ } \
+ \
+ shift; \
+ })
+
+static inline unsigned int ps_to_output_size(unsigned int ps)
+{
+ switch (ps) {
+ case 0: return 32;
+ case 1: return 36;
+ case 2: return 40;
+ case 3: return 42;
+ case 4: return 44;
+ case 5:
+ default:
+ return 48;
+ }
+}
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 19278dfe7978..03f4c3d7839c 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -59,6 +59,48 @@ typedef u64 kvm_pte_t;
#define KVM_PHYS_INVALID (-1ULL)
+#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
+
+#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
+
+#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
+
+#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
+ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
+ KVM_PTE_LEAF_ATTR_HI_S2_XN)
+
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
+#define KVM_MAX_OWNER_ID 1
+
+/*
+ * Used to indicate a pte for which a 'break-before-make' sequence is in
+ * progress.
+ */
+#define KVM_INVALID_PTE_LOCKED BIT(10)
+
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..b0c9a86b13a4
--- /dev/null
+++ b/arch/arm64/include/asm/mem_encrypt.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_MEM_ENCRYPT_H
+#define __ASM_MEM_ENCRYPT_H
+
+struct arm64_mem_crypt_ops {
+ int (*encrypt)(unsigned long addr, int numpages);
+ int (*decrypt)(unsigned long addr, int numpages);
+};
+
+int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops);
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* __ASM_MEM_ENCRYPT_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 54fb014eba05..0480c61dbb4f 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -110,6 +110,8 @@
#define PAGE_END (_PAGE_END(VA_BITS_MIN))
#endif /* CONFIG_KASAN */
+#define PHYSMEM_END __pa(PAGE_END - 1)
+
#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT)
/*
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 5966ee4a6154..52791715f6e6 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -7,7 +7,7 @@
#include <uapi/asm/mman.h>
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
- unsigned long pkey __always_unused)
+ unsigned long pkey)
{
unsigned long ret = 0;
@@ -17,6 +17,14 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
if (system_supports_mte() && (prot & PROT_MTE))
ret |= VM_MTE;
+#ifdef CONFIG_ARCH_HAS_PKEYS
+ if (system_supports_poe()) {
+ ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0;
+ ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0;
+ ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0;
+ }
+#endif
+
return ret;
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 65977c7783c5..2ec96d91acc6 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -25,6 +25,7 @@ typedef struct {
refcount_t pinned;
void *vdso;
unsigned long flags;
+ u8 pkey_allocation_map;
} mm_context_t;
/*
@@ -63,7 +64,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void)
extern void arm64_memblock_init(void);
extern void paging_init(void);
extern void bootmem_init(void);
-extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index bd19f4c758b7..7c09d47e09cb 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -15,12 +15,12 @@
#include <linux/sched/hotplug.h>
#include <linux/mm_types.h>
#include <linux/pgtable.h>
+#include <linux/pkeys.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/proc-fns.h>
-#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
@@ -175,9 +175,36 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
atomic64_set(&mm->context.id, 0);
refcount_set(&mm->context.pinned, 0);
+
+ /* pkey 0 is the default, so always reserve it. */
+ mm->context.pkey_allocation_map = BIT(0);
+
+ return 0;
+}
+
+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ /* Duplicate the oldmm pkey state in mm: */
+ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+}
+
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ arch_dup_pkeys(oldmm, mm);
+
return 0;
}
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void update_saved_ttbr0(struct task_struct *tsk,
struct mm_struct *mm)
@@ -267,6 +294,23 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm)
return -1UL >> 8;
}
+/*
+ * Only enforce protection keys on the current process, because there is no
+ * user context to access POR_EL0 for another address space.
+ */
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ if (!system_supports_poe())
+ return true;
+
+ /* allow access if the VMA is not one from this process */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return por_el0_allows_pkey(vma_pkey(vma), write, execute);
+}
+
#include <asm-generic/mmu_context.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 1f60aa1bc750..fd330c1db289 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -135,7 +135,6 @@
/*
* Section
*/
-#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
@@ -200,11 +199,26 @@
#define PTE_PI_IDX_3 54 /* UXN */
/*
+ * POIndex[2:0] encoding (Permission Overlay Extension)
+ */
+#define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60)
+#define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61)
+#define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62)
+
+#define PTE_PO_IDX_MASK GENMASK_ULL(62, 60)
+
+
+/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
/*
+ * Hierarchical permission for Stage-1 tables
+ */
+#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61)
+
+/*
* Highest possible physical address supported.
*/
#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
@@ -298,6 +312,10 @@
#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
+#define TCR_HPD0_SHIFT 41
+#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT)
+#define TCR_HPD1_SHIFT 42
+#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT)
#define TCR_TBID0 (UL(1) << 51)
#define TCR_TBID1 (UL(1) << 52)
#define TCR_NFD0 (UL(1) << 53)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index b11cfb9fdd37..2a11d0c10760 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -154,10 +154,10 @@ static inline bool __pure lpa2_is_enabled(void)
#define PIE_E0 ( \
PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW))
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
#define PIE_E1 ( \
PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7a4f5604be3f..96c2b0b07c4c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -34,6 +34,7 @@
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
+#include <asm/por.h>
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
@@ -149,6 +150,24 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_accessible(mm, pte) \
(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
+static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute)
+{
+ u64 por;
+
+ if (!system_supports_poe())
+ return true;
+
+ por = read_sysreg_s(SYS_POR_EL0);
+
+ if (write)
+ return por_elx_allows_write(por, pkey);
+
+ if (execute)
+ return por_elx_allows_exec(por, pkey);
+
+ return por_elx_allows_read(por, pkey);
+}
+
/*
* p??_access_permitted() is true for valid user mappings (PTE_USER
* bit set, subject to the write permission check). For execute-only
@@ -156,8 +175,11 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
* not set) must return false. PROT_NONE mappings do not have the
* PTE_VALID bit set.
*/
-#define pte_access_permitted(pte, write) \
+#define pte_access_permitted_no_overlay(pte, write) \
(((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
+#define pte_access_permitted(pte, write) \
+ (pte_access_permitted_no_overlay(pte, write) && \
+ por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false))
#define pmd_access_permitted(pmd, write) \
(pte_access_permitted(pmd_pte(pmd), (write)))
#define pud_access_permitted(pud, write) \
@@ -373,10 +395,11 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
/*
* If the PTE would provide user space access to the tags associated
* with it then ensure that the MTE tags are synchronised. Although
- * pte_access_permitted() returns false for exec only mappings, they
- * don't expose tags (instruction fetches don't check tags).
+ * pte_access_permitted_no_overlay() returns false for exec only
+ * mappings, they don't expose tags (instruction fetches don't check
+ * tags).
*/
- if (system_supports_mte() && pte_access_permitted(pte, false) &&
+ if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) &&
!pte_special(pte) && pte_tagged(pte))
mte_sync_tags(pte, nr_pages);
}
@@ -1103,7 +1126,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE |
- PTE_GP | PTE_ATTRINDX_MASK;
+ PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK;
+
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
diff --git a/arch/arm64/include/asm/pkeys.h b/arch/arm64/include/asm/pkeys.h
new file mode 100644
index 000000000000..0ca5f83ce148
--- /dev/null
+++ b/arch/arm64/include/asm/pkeys.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ *
+ * Based on arch/x86/include/asm/pkeys.h
+ */
+
+#ifndef _ASM_ARM64_PKEYS_H
+#define _ASM_ARM64_PKEYS_H
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2)
+
+#define arch_max_pkey() 8
+
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return system_supports_poe();
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (pkey != -1)
+ return pkey;
+
+ return vma_pkey(vma);
+}
+
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+ // Execute-only mappings are handled by EPAN/FEAT_PAN3.
+ return -1;
+}
+
+#define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map
+#define mm_set_pkey_allocated(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) |= (1U << pkey); \
+} while (0)
+#define mm_set_pkey_free(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
+} while (0)
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ /*
+ * "Allocated" pkeys are those that have been returned
+ * from pkey_alloc() or pkey 0 which is allocated
+ * implicitly when the mm is created.
+ */
+ if (pkey < 0 || pkey >= arch_max_pkey())
+ return false;
+
+ return mm_pkey_allocation_map(mm) & (1U << pkey);
+}
+
+/*
+ * Returns a positive, 3-bit key on success, or -1 on failure.
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure
+ * that the pkey is valid as far as the hardware is
+ * concerned. The rest of the kernel trusts that
+ * only good, valid pkeys come out of here.
+ */
+ u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0);
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -1;
+
+ /*
+ * Are we out of pkeys? We must handle this specially
+ * because ffz() behavior is undefined if there are no
+ * zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz(mm_pkey_allocation_map(mm));
+
+ mm_set_pkey_allocated(mm, ret);
+
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ mm_set_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+#endif /* _ASM_ARM64_PKEYS_H */
diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h
new file mode 100644
index 000000000000..e06e9f473675
--- /dev/null
+++ b/arch/arm64/include/asm/por.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _ASM_ARM64_POR_H
+#define _ASM_ARM64_POR_H
+
+#define POR_BITS_PER_PKEY 4
+#define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf)
+
+static inline bool por_elx_allows_read(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_IDX(por, pkey);
+
+ return perm & POE_R;
+}
+
+static inline bool por_elx_allows_write(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_IDX(por, pkey);
+
+ return perm & POE_W;
+}
+
+static inline bool por_elx_allows_exec(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_IDX(por, pkey);
+
+ return perm & POE_X;
+}
+
+#endif /* _ASM_ARM64_POR_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index f77371232d8c..1438424f0064 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -184,6 +184,7 @@ struct thread_struct {
u64 sctlr_user;
u64 svcr;
u64 tpidr2_el0;
+ u64 por_el0;
};
static inline unsigned int thread_get_vl(struct thread_struct *thread,
@@ -402,5 +403,10 @@ long get_tagged_addr_ctrl(struct task_struct *task);
#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current)
#endif
+int get_tsc_mode(unsigned long adr);
+int set_tsc_mode(unsigned int val);
+#define GET_TSC_CTL(adr) get_tsc_mode((adr))
+#define SET_TSC_CTL(val) set_tsc_mode((val))
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 5b1701c76d1c..6cf4aae05219 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -5,6 +5,8 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
+#include <linux/ptdump.h>
+
#ifdef CONFIG_PTDUMP_CORE
#include <linux/mm_types.h>
@@ -21,14 +23,53 @@ struct ptdump_info {
unsigned long base_addr;
};
+struct ptdump_prot_bits {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+};
+
+struct ptdump_pg_level {
+ const struct ptdump_prot_bits *bits;
+ char name[4];
+ int num;
+ u64 mask;
+};
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct ptdump_pg_state {
+ struct ptdump_state ptdump;
+ struct ptdump_pg_level *pg_level;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ const struct mm_struct *mm;
+ unsigned long start_address;
+ int level;
+ u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
+};
+
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val);
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
-#endif
+#endif /* CONFIG_PTDUMP_DEBUGFS */
+#else
+static inline void note_page(struct ptdump_state *pt_st, unsigned long addr,
+ int level, u64 val) { }
#endif /* CONFIG_PTDUMP_CORE */
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h
index 0f740b781187..917761feeffd 100644
--- a/arch/arm64/include/asm/set_memory.h
+++ b/arch/arm64/include/asm/set_memory.h
@@ -3,6 +3,7 @@
#ifndef _ASM_ARM64_SET_MEMORY_H
#define _ASM_ARM64_SET_MEMORY_H
+#include <asm/mem_encrypt.h>
#include <asm-generic/set_memory.h>
bool can_set_direct_map(void);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4a9ea103817e..9ea97dddefc4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -109,6 +109,9 @@
#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
+/* Register-based PAN access, for save/restore purposes */
+#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
+
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@@ -325,7 +328,25 @@
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
+/* When PAR_EL1.F == 1 */
#define SYS_PAR_EL1_FST GENMASK(6, 1)
+#define SYS_PAR_EL1_PTW BIT(8)
+#define SYS_PAR_EL1_S BIT(9)
+#define SYS_PAR_EL1_AssuredOnly BIT(12)
+#define SYS_PAR_EL1_TopLevel BIT(13)
+#define SYS_PAR_EL1_Overlay BIT(14)
+#define SYS_PAR_EL1_DirtyBit BIT(15)
+#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48)
+#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
+#define SYS_PAR_EL1_RES1 BIT(11)
+/* When PAR_EL1.F == 0 */
+#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7)
+#define SYS_PAR_EL1_NS BIT(9)
+#define SYS_PAR_EL1_F0_IMPDEF BIT(10)
+#define SYS_PAR_EL1_NSE BIT(11)
+#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12)
+#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
+#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
/*** Statistical Profiling Extension ***/
#define PMSEVFR_EL1_RES0_IMP \
@@ -403,7 +424,6 @@
#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2)
#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3)
#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4)
-#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5)
#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6)
#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7)
#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0)
@@ -652,6 +672,7 @@
#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
/* TLBI instructions */
#define TLBI_Op0 1
@@ -1077,6 +1098,9 @@
#define POE_RXW UL(0x7)
#define POE_MASK UL(0xf)
+/* Initial value for Permission Overlay Extension for EL0 */
+#define POR_EL0_INIT POE_RXW
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e72a3bf9e563..1114c1c3300a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -81,6 +81,7 @@ void arch_setup_new_exec(void);
#define TIF_SME 27 /* SME in use */
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
+#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
@@ -97,6 +98,7 @@ void arch_setup_new_exec(void);
#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index eefe766d6161..d780d1bd2eac 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
void arm64_notify_segfault(unsigned long addr);
void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey);
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index df2c47c55972..06f8ec0906a6 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -52,6 +52,7 @@
#define VNCR_PIRE0_EL1 0x290
#define VNCR_PIRE0_EL2 0x298
#define VNCR_PIR_EL1 0x2A0
+#define VNCR_POR_EL1 0x2A8
#define VNCR_ICH_LR0_EL2 0x400
#define VNCR_ICH_LR1_EL2 0x408
#define VNCR_ICH_LR2_EL2 0x410
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 285610e626f5..055381b2c615 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -122,5 +122,6 @@
#define HWCAP2_SME_SF8FMA (1UL << 60)
#define HWCAP2_SME_SF8DP4 (1UL << 61)
#define HWCAP2_SME_SF8DP2 (1UL << 62)
+#define HWCAP2_POE (1UL << 63)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h
index 1e6482a838e1..e7e0c8216243 100644
--- a/arch/arm64/include/uapi/asm/mman.h
+++ b/arch/arm64/include/uapi/asm/mman.h
@@ -7,4 +7,13 @@
#define PROT_BTI 0x10 /* BTI guarded page */
#define PROT_MTE 0x20 /* Normal Tagged mapping */
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_DISABLE_READ 0x8
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE |\
+ PKEY_DISABLE_READ |\
+ PKEY_DISABLE_EXECUTE)
+
#endif /* ! _UAPI__ASM_MMAN_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 8a45b7a411e0..bb7af77a30a7 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -98,6 +98,13 @@ struct esr_context {
__u64 esr;
};
+#define POE_MAGIC 0x504f4530
+
+struct poe_context {
+ struct _aarch64_ctx head;
+ __u64 por_el0;
+};
+
/*
* extra_context: describes extra space in the signal frame for
* additional structures that don't fit in sigcontext.__reserved[].
@@ -320,10 +327,10 @@ struct zt_context {
((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \
/ __SVE_VQ_BYTES * __SVE_VQ_BYTES)
-#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+#define ZA_SIG_REGS_SIZE(vq) (((vq) * __SVE_VQ_BYTES) * ((vq) * __SVE_VQ_BYTES))
#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \
- (SVE_SIG_ZREG_SIZE(vq) * n))
+ (SVE_SIG_ZREG_SIZE(vq) * (n)))
#define ZA_SIG_CONTEXT_SIZE(vq) \
(ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
@@ -334,7 +341,7 @@ struct zt_context {
#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
-#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n)
+#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * (n))
#define ZT_SIG_CONTEXT_SIZE(n) \
(sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index f6b6b4507357..dfefbdf4073a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -456,6 +456,14 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
};
#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
+static const struct midr_range erratum_ac03_cpu_38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -772,7 +780,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "AmpereOne erratum AC03_CPU_38",
.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
},
#endif
{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646ecd3069fd..718728a85430 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -466,6 +466,8 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
+ FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
@@ -2348,6 +2350,14 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
}
+#ifdef CONFIG_ARM64_POE
+static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1x_E0POE);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_E0POE);
+}
+#endif
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2870,6 +2880,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_nv1,
ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
},
+#ifdef CONFIG_ARM64_POE
+ {
+ .desc = "Stage-1 Permission Overlay Extension (S1POE)",
+ .capability = ARM64_HAS_S1POE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_poe,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
+ },
+#endif
{},
};
@@ -3034,6 +3054,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
+#ifdef CONFIG_ARM64_POE
+ HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE),
+#endif
{},
};
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 09eeaa24d456..44718d0482b3 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -143,6 +143,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
[KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
[KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
+ [KERNEL_HWCAP_POE] = "poe",
};
#ifdef CONFIG_COMPAT
@@ -280,7 +281,7 @@ const struct seq_operations cpuinfo_op = {
};
-static struct kobj_type cpuregs_kobj_type = {
+static const struct kobj_type cpuregs_kobj_type = {
.sysfs_ops = &kobj_sysfs_ops,
};
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 02870beb271e..7b11d84f533c 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -407,7 +407,7 @@ int swsusp_arch_resume(void)
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
- .trans_alloc_arg = (void *)GFP_ATOMIC,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
};
/*
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index f872c57e9909..fd9a7bed83ce 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -6,28 +6,7 @@
* Copyright (C) 2014 ARM Ltd.
*/
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
-#include <linux/slab.h>
-
-#ifdef CONFIG_ACPI
-/*
- * Try to assign the IRQ number when probing a new device
- */
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
- if (!acpi_disabled)
- acpi_pci_irq_enable(dev);
-
- return 0;
-}
-#endif
/*
* raw_pci_read/write - Platform-specific PCI config space access.
@@ -61,173 +40,3 @@ int pcibus_to_node(struct pci_bus *bus)
EXPORT_SYMBOL(pcibus_to_node);
#endif
-
-#ifdef CONFIG_ACPI
-
-struct acpi_pci_generic_root_info {
- struct acpi_pci_root_info common;
- struct pci_config_window *cfg; /* config space mapping */
-};
-
-int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
-{
- struct pci_config_window *cfg = bus->sysdata;
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct acpi_pci_root *root = acpi_driver_data(adev);
-
- return root->segment;
-}
-
-int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
-{
- struct pci_config_window *cfg;
- struct acpi_device *adev;
- struct device *bus_dev;
-
- if (acpi_disabled)
- return 0;
-
- cfg = bridge->bus->sysdata;
-
- /*
- * On Hyper-V there is no corresponding ACPI device for a root bridge,
- * therefore ->parent is set as NULL by the driver. And set 'adev' as
- * NULL in this case because there is no proper ACPI device.
- */
- if (!cfg->parent)
- adev = NULL;
- else
- adev = to_acpi_device(cfg->parent);
-
- bus_dev = &bridge->bus->dev;
-
- ACPI_COMPANION_SET(&bridge->dev, adev);
- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
-
- return 0;
-}
-
-static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
-{
- struct resource_entry *entry, *tmp;
- int status;
-
- status = acpi_pci_probe_root_resources(ci);
- resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
- if (!(entry->res->flags & IORESOURCE_WINDOW))
- resource_list_destroy_entry(entry);
- }
- return status;
-}
-
-/*
- * Lookup the bus range for the domain in MCFG, and set up config space
- * mapping.
- */
-static struct pci_config_window *
-pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
-{
- struct device *dev = &root->device->dev;
- struct resource *bus_res = &root->secondary;
- u16 seg = root->segment;
- const struct pci_ecam_ops *ecam_ops;
- struct resource cfgres;
- struct acpi_device *adev;
- struct pci_config_window *cfg;
- int ret;
-
- ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops);
- if (ret) {
- dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res);
- return NULL;
- }
-
- adev = acpi_resource_consumer(&cfgres);
- if (adev)
- dev_info(dev, "ECAM area %pR reserved by %s\n", &cfgres,
- dev_name(&adev->dev));
- else
- dev_warn(dev, FW_BUG "ECAM area %pR not reserved in ACPI namespace\n",
- &cfgres);
-
- cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
- if (IS_ERR(cfg)) {
- dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res,
- PTR_ERR(cfg));
- return NULL;
- }
-
- return cfg;
-}
-
-/* release_info: free resources allocated by init_info */
-static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
-{
- struct acpi_pci_generic_root_info *ri;
-
- ri = container_of(ci, struct acpi_pci_generic_root_info, common);
- pci_ecam_free(ri->cfg);
- kfree(ci->ops);
- kfree(ri);
-}
-
-/* Interface called from ACPI code to setup PCI host controller */
-struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
-{
- struct acpi_pci_generic_root_info *ri;
- struct pci_bus *bus, *child;
- struct acpi_pci_root_ops *root_ops;
- struct pci_host_bridge *host;
-
- ri = kzalloc(sizeof(*ri), GFP_KERNEL);
- if (!ri)
- return NULL;
-
- root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
- if (!root_ops) {
- kfree(ri);
- return NULL;
- }
-
- ri->cfg = pci_acpi_setup_ecam_mapping(root);
- if (!ri->cfg) {
- kfree(ri);
- kfree(root_ops);
- return NULL;
- }
-
- root_ops->release_info = pci_acpi_generic_release_info;
- root_ops->prepare_resources = pci_acpi_root_prepare_resources;
- root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
- bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
- if (!bus)
- return NULL;
-
- /* If we must preserve the resource configuration, claim now */
- host = pci_find_host_bridge(bus);
- if (host->preserve_config)
- pci_bus_claim_resources(bus);
-
- /*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
- */
- pci_assign_unassigned_root_bus_resources(bus);
-
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
-
- return bus;
-}
-
-void pcibios_add_bus(struct pci_bus *bus)
-{
- acpi_pci_add_bus(bus);
-}
-
-void pcibios_remove_bus(struct pci_bus *bus)
-{
- acpi_pci_remove_bus(bus);
-}
-
-#endif
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4ae31b7af6c3..0540653fbf38 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -43,6 +43,7 @@
#include <linux/stacktrace.h>
#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
@@ -271,12 +272,21 @@ static void flush_tagged_addr_state(void)
clear_thread_flag(TIF_TAGGED_ADDR);
}
+static void flush_poe(void)
+{
+ if (!system_supports_poe())
+ return;
+
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
+ flush_poe();
}
void arch_release_task_struct(struct task_struct *tsk)
@@ -371,6 +381,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (system_supports_tpidr2())
p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ if (system_supports_poe())
+ p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
@@ -472,27 +485,63 @@ static void entry_task_switch(struct task_struct *next)
}
/*
- * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Ensure access is disabled when switching to a 32bit task, ensure
- * access is enabled when switching to a 64bit task.
+ * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of
+ * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0}
+ * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is
+ * required or PR_TSC_SIGSEGV is set.
*/
-static void erratum_1418040_thread_switch(struct task_struct *next)
+static void update_cntkctl_el1(struct task_struct *next)
{
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
- !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
- return;
+ struct thread_info *ti = task_thread_info(next);
- if (is_compat_thread(task_thread_info(next)))
+ if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) ||
+ has_erratum_handler(read_cntvct_el0) ||
+ (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+ this_cpu_has_cap(ARM64_WORKAROUND_1418040) &&
+ is_compat_thread(ti)))
sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
else
sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
}
-static void erratum_1418040_new_exec(void)
+static void cntkctl_thread_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if ((read_ti_thread_flags(task_thread_info(prev)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)) !=
+ (read_ti_thread_flags(task_thread_info(next)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)))
+ update_cntkctl_el1(next);
+}
+
+static int do_set_tsc_mode(unsigned int val)
{
+ bool tsc_sigsegv;
+
+ if (val == PR_TSC_SIGSEGV)
+ tsc_sigsegv = true;
+ else if (val == PR_TSC_ENABLE)
+ tsc_sigsegv = false;
+ else
+ return -EINVAL;
+
preempt_disable();
- erratum_1418040_thread_switch(current);
+ update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv);
+ update_cntkctl_el1(current);
preempt_enable();
+
+ return 0;
+}
+
+static void permission_overlay_switch(struct task_struct *next)
+{
+ if (!system_supports_poe())
+ return;
+
+ current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+ if (current->thread.por_el0 != next->thread.por_el0) {
+ write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ }
}
/*
@@ -528,8 +577,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
contextidr_thread_switch(next);
entry_task_switch(next);
ssbs_thread_switch(next);
- erratum_1418040_thread_switch(next);
+ cntkctl_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
+ permission_overlay_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -645,7 +695,7 @@ void arch_setup_new_exec(void)
current->mm->context.flags = mmflags;
ptrauth_thread_init_user();
mte_thread_init_user();
- erratum_1418040_new_exec();
+ do_set_tsc_mode(PR_TSC_ENABLE);
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
@@ -754,3 +804,26 @@ int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
return prot;
}
#endif
+
+int get_tsc_mode(unsigned long adr)
+{
+ unsigned int val;
+
+ if (is_compat_task())
+ return -EINVAL;
+
+ if (test_thread_flag(TIF_TSC_SIGSEGV))
+ val = PR_TSC_SIGSEGV;
+ else
+ val = PR_TSC_ENABLE;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+ if (is_compat_task())
+ return -EINVAL;
+
+ return do_set_tsc_mode(val);
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 0d022599eb61..b756578aeaee 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1440,6 +1440,39 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
}
#endif
+#ifdef CONFIG_ARM64_POE
+static int poe_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.por_el0,
+ sizeof(target->thread.por_el0));
+}
+
+static int poe_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ target->thread.por_el0 = ctrl;
+
+ return 0;
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1469,6 +1502,9 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
REGSET_TAGGED_ADDR_CTRL,
#endif
+#ifdef CONFIG_ARM64_POE
+ REGSET_POE
+#endif
};
static const struct user_regset aarch64_regsets[] = {
@@ -1628,6 +1664,16 @@ static const struct user_regset aarch64_regsets[] = {
.set = tagged_addr_ctrl_set,
},
#endif
+#ifdef CONFIG_ARM64_POE
+ [REGSET_POE] = {
+ .core_note_type = NT_ARM_POE,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = poe_get,
+ .set = poe_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4a77f4976e11..561986947530 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -61,6 +61,7 @@ struct rt_sigframe_user_layout {
unsigned long za_offset;
unsigned long zt_offset;
unsigned long fpmr_offset;
+ unsigned long poe_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -185,6 +186,8 @@ struct user_ctxs {
u32 zt_size;
struct fpmr_context __user *fpmr;
u32 fpmr_size;
+ struct poe_context __user *poe;
+ u32 poe_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@@ -258,6 +261,32 @@ static int restore_fpmr_context(struct user_ctxs *user)
return err;
}
+static int preserve_poe_context(struct poe_context __user *ctx)
+{
+ int err = 0;
+
+ __put_user_error(POE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err);
+
+ return err;
+}
+
+static int restore_poe_context(struct user_ctxs *user)
+{
+ u64 por_el0;
+ int err = 0;
+
+ if (user->poe_size != sizeof(*user->poe))
+ return -EINVAL;
+
+ __get_user_error(por_el0, &(user->poe->por_el0), err);
+ if (!err)
+ write_sysreg_s(por_el0, SYS_POR_EL0);
+
+ return err;
+}
+
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
@@ -621,6 +650,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->za = NULL;
user->zt = NULL;
user->fpmr = NULL;
+ user->poe = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -671,6 +701,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case POE_MAGIC:
+ if (!system_supports_poe())
+ goto invalid;
+
+ if (user->poe)
+ goto invalid;
+
+ user->poe = (struct poe_context __user *)head;
+ user->poe_size = size;
+ break;
+
case SVE_MAGIC:
if (!system_supports_sve() && !system_supports_sme())
goto invalid;
@@ -857,6 +898,9 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_sme2() && user.zt)
err = restore_zt_context(&user);
+ if (err == 0 && system_supports_poe() && user.poe)
+ err = restore_poe_context(&user);
+
return err;
}
@@ -980,6 +1024,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_poe()) {
+ err = sigframe_alloc(user, &user->poe_offset,
+ sizeof(struct poe_context));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
@@ -1042,6 +1093,14 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_fpmr_context(fpmr_ctx);
}
+ if (system_supports_poe() && err == 0 && user->poe_offset) {
+ struct poe_context __user *poe_ctx =
+ apply_user_offset(user, user->poe_offset);
+
+ err |= preserve_poe_context(poe_ctx);
+ }
+
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -1178,6 +1237,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
sme_smstop();
}
+ if (system_supports_poe())
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f01f0fd7b7fe..3b3f6b56e733 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -68,7 +68,7 @@ enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
- IPI_CPU_CRASH_STOP,
+ IPI_CPU_STOP_NMI,
IPI_TIMER,
IPI_IRQ_WORK,
NR_IPI,
@@ -85,6 +85,8 @@ static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+static bool crash_stop;
+
static void ipi_setup(int cpu);
#ifdef CONFIG_HOTPLUG_CPU
@@ -823,7 +825,7 @@ static const char *ipi_types[MAX_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
- [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
+ [IPI_CPU_STOP_NMI] = "CPU stop NMIs",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
@@ -867,9 +869,9 @@ void arch_irq_work_raise(void)
}
#endif
-static void __noreturn local_cpu_stop(void)
+static void __noreturn local_cpu_stop(unsigned int cpu)
{
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
local_daif_mask();
sdei_mask_local_cpu();
@@ -883,21 +885,26 @@ static void __noreturn local_cpu_stop(void)
*/
void __noreturn panic_smp_self_stop(void)
{
- local_cpu_stop();
+ local_cpu_stop(smp_processor_id());
}
-#ifdef CONFIG_KEXEC_CORE
-static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
-#endif
-
static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use local_daif_mask() instead of local_irq_disable() to make sure
+ * that pseudo-NMIs are disabled. The "crash stop" code starts with
+ * an IRQ and falls back to NMI (which might be pseudo). If the IRQ
+ * finally goes through right as we're timing out then the NMI could
+ * interrupt us. It's better to prevent the NMI and let the IRQ
+ * finish since the pt_regs will be better.
+ */
+ local_daif_mask();
+
crash_save_cpu(regs, cpu);
- atomic_dec(&waiting_for_crash_ipi);
+ set_cpu_online(cpu, false);
- local_irq_disable();
sdei_mask_local_cpu();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
@@ -962,14 +969,12 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_CPU_STOP:
- local_cpu_stop();
- break;
-
- case IPI_CPU_CRASH_STOP:
- if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ case IPI_CPU_STOP_NMI:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) {
ipi_cpu_crash_stop(cpu, get_irq_regs());
-
unreachable();
+ } else {
+ local_cpu_stop(cpu);
}
break;
@@ -1024,8 +1029,7 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
return false;
switch (ipi) {
- case IPI_CPU_STOP:
- case IPI_CPU_CRASH_STOP:
+ case IPI_CPU_STOP_NMI:
case IPI_CPU_BACKTRACE:
case IPI_KGDB_ROUNDUP:
return true;
@@ -1138,79 +1142,109 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
+ static unsigned long stop_in_progress;
+ cpumask_t mask;
unsigned long timeout;
- if (num_other_online_cpus()) {
- cpumask_t mask;
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
+ /* Only proceed if this is the first CPU to reach this code */
+ if (test_and_set_bit(0, &stop_in_progress))
+ return;
- if (system_state <= SYSTEM_RUNNING)
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_STOP);
- }
+ /*
+ * Send an IPI to all currently online CPUs except the CPU running
+ * this code.
+ *
+ * NOTE: we don't do anything here to prevent other CPUs from coming
+ * online after we snapshot `cpu_online_mask`. Ideally, the calling code
+ * should do something to prevent other CPUs from coming up. This code
+ * can be called in the panic path and thus it doesn't seem wise to
+ * grab the CPU hotplug mutex ourselves. Worst case:
+ * - If a CPU comes online as we're running, we'll likely notice it
+ * during the 1 second wait below and then we'll catch it when we try
+ * with an NMI (assuming NMIs are enabled) since we re-snapshot the
+ * mask before sending an NMI.
+ * - If we leave the function and see that CPUs are still online we'll
+ * at least print a warning. Especially without NMIs this function
+ * isn't foolproof anyway so calling code will just have to accept
+ * the fact that there could be cases where a CPU can't be stopped.
+ */
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
- /* Wait up to one second for other CPUs to stop */
+ if (system_state <= SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
+
+ /*
+ * Start with a normal IPI and wait up to one second for other CPUs to
+ * stop. We do this first because it gives other processors a chance
+ * to exit critical sections / drop locks and makes the rest of the
+ * stop process (especially console flush) more robust.
+ */
+ smp_cross_call(&mask, IPI_CPU_STOP);
timeout = USEC_PER_SEC;
while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_other_online_cpus())
+ /*
+ * If CPUs are still online, try an NMI. There's no excuse for this to
+ * be slow, so we only give them an extra 10 ms to respond.
+ */
+ if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+
+ smp_cross_call(&mask, IPI_CPU_STOP_NMI);
+ timeout = USEC_PER_MSEC * 10;
+ while (num_other_online_cpus() && timeout--)
+ udelay(1);
+ }
+
+ if (num_other_online_cpus()) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ cpumask_pr_args(&mask));
+ }
+skip_ipi:
sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
void crash_smp_send_stop(void)
{
- static int cpus_stopped;
- cpumask_t mask;
- unsigned long timeout;
-
/*
* This function can be called twice in panic path, but obviously
* we execute this only once.
+ *
+ * We use this same boolean to tell whether the IPI we send was a
+ * stop or a "crash stop".
*/
- if (cpus_stopped)
+ if (crash_stop)
return;
+ crash_stop = 1;
- cpus_stopped = 1;
+ smp_send_stop();
- /*
- * If this cpu is the only one alive at this point in time, online or
- * not, there are no stop messages to be sent around, so just back out.
- */
- if (num_other_online_cpus() == 0)
- goto skip_ipi;
-
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
-
- atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
-
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
-
- /* Wait up to one second for other CPUs to stop */
- timeout = USEC_PER_SEC;
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
- udelay(1);
-
- if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
-
-skip_ipi:
- sdei_mask_local_cpu();
sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
{
- return (atomic_read(&waiting_for_crash_ipi) > 0);
+ return num_other_online_cpus() != 0;
}
#endif
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 6b3258860377..2729faaee4b4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -25,6 +25,7 @@
*
* @common: Common unwind state.
* @task: The task being unwound.
+ * @graph_idx: Used by ftrace_graph_ret_addr() for optimized stack unwinding.
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
@@ -32,6 +33,7 @@
struct kunwind_state {
struct unwind_state common;
struct task_struct *task;
+ int graph_idx;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
@@ -106,7 +108,7 @@ kunwind_recover_return_address(struct kunwind_state *state)
if (state->task->ret_stack &&
(state->common.pc == (unsigned long)return_to_handler)) {
unsigned long orig_pc;
- orig_pc = ftrace_graph_ret_addr(state->task, NULL,
+ orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
state->common.pc,
(void *)state->common.fp);
if (WARN_ON_ONCE(state->common.pc == orig_pc))
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9e22683aa921..563cbce11126 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -273,6 +273,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
force_sig_fault(signo, code, (void __user *)far);
}
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
+{
+ arm64_show_signal(SIGSEGV, str);
+ force_sig_pkuerr((void __user *)far, pkey);
+}
+
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
@@ -601,18 +607,26 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_read_counter());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_read_counter());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_get_rate());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_get_rate());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void mrs_handler(unsigned long esr, struct pt_regs *regs)
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8304eb342be9..ead632ad01b4 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -66,4 +66,21 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
+config PTDUMP_STAGE2_DEBUGFS
+ bool "Present the stage-2 pagetables to debugfs"
+ depends on KVM
+ depends on DEBUG_KERNEL
+ depends on DEBUG_FS
+ depends on GENERIC_PTDUMP
+ select PTDUMP_CORE
+ default n
+ help
+ Say Y here if you want to show the stage-2 kernel pagetables
+ layout in a debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 86a629aaf0a1..3cf7adb2b503 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -17,7 +17,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
- arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
+ arch_timer.o trng.o vmid.o emulate-nested.o nested.o at.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
@@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
+kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
always-y := hyp_constants.h hyp-constants.s
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9bef7638342e..fe0764173cd0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -46,6 +46,8 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
+#include "sys_regs.h"
+
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
enum kvm_wfx_trap_policy {
@@ -228,6 +230,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
kvm_sys_regs_create_debugfs(kvm);
+ kvm_s2_ptdump_create_debugfs(kvm);
}
static void kvm_destroy_mpidr_data(struct kvm *kvm)
@@ -821,15 +824,13 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
}
- if (vcpu_has_nv(vcpu)) {
- ret = kvm_init_nv_sysregs(vcpu->kvm);
- if (ret)
- return ret;
- }
+ ret = kvm_finalize_sys_regs(vcpu);
+ if (ret)
+ return ret;
/*
- * This needs to happen after NV has imposed its own restrictions on
- * the feature set
+ * This needs to happen after any restriction has been applied
+ * to the feature set.
*/
kvm_calculate_traps(vcpu);
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
new file mode 100644
index 000000000000..39f0e87a340e
--- /dev/null
+++ b/arch/arm64/kvm/at.c
@@ -0,0 +1,1101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 - Linaro Ltd
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/esr.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+enum trans_regime {
+ TR_EL10,
+ TR_EL20,
+ TR_EL2,
+};
+
+struct s1_walk_info {
+ u64 baddr;
+ enum trans_regime regime;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int txsz;
+ int sl;
+ bool hpd;
+ bool be;
+ bool s2;
+};
+
+struct s1_walk_result {
+ union {
+ struct {
+ u64 desc;
+ u64 pa;
+ s8 level;
+ u8 APTable;
+ bool UXNTable;
+ bool PXNTable;
+ };
+ struct {
+ u8 fst;
+ bool ptw;
+ bool s2;
+ };
+ };
+ bool failed;
+};
+
+static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
+{
+ wr->fst = fst;
+ wr->ptw = ptw;
+ wr->s2 = s2;
+ wr->failed = true;
+}
+
+#define S1_MMU_DISABLED (-127)
+
+static int get_ia_size(struct s1_walk_info *wi)
+{
+ return 64 - wi->txsz;
+}
+
+/* Return true if the IPA is out of the OA range */
+static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
+{
+ return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
+}
+
+/* Return the translation regime that applies to an AT instruction */
+static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
+{
+ /*
+ * We only get here from guest EL2, so the translation
+ * regime AT applies to is solely defined by {E2H,TGE}.
+ */
+ switch (op) {
+ case OP_AT_S1E2R:
+ case OP_AT_S1E2W:
+ case OP_AT_S1E2A:
+ return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
+ break;
+ default:
+ return (vcpu_el2_e2h_is_set(vcpu) &&
+ vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
+ }
+}
+
+static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va)
+{
+ u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
+ unsigned int stride, x;
+ bool va55, tbi, lva, as_el0;
+
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ wi->regime = compute_translation_regime(vcpu, op);
+ as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
+
+ va55 = va & BIT(55);
+
+ if (wi->regime == TR_EL2 && va55)
+ goto addrsz;
+
+ wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
+
+ switch (wi->regime) {
+ case TR_EL10:
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
+ ttbr = (va55 ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL1));
+ break;
+ case TR_EL2:
+ case TR_EL20:
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ ttbr = (va55 ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL2));
+ break;
+ default:
+ BUG();
+ }
+
+ tbi = (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_TBI, tcr) :
+ (va55 ?
+ FIELD_GET(TCR_TBI1, tcr) :
+ FIELD_GET(TCR_TBI0, tcr)));
+
+ if (!tbi && (u64)sign_extend64(va, 55) != va)
+ goto addrsz;
+
+ va = (u64)sign_extend64(va, 55);
+
+ /* Let's put the MMU disabled case aside immediately */
+ switch (wi->regime) {
+ case TR_EL10:
+ /*
+ * If dealing with the EL1&0 translation regime, 3 things
+ * can disable the S1 translation:
+ *
+ * - HCR_EL2.DC = 1
+ * - HCR_EL2.{E2H,TGE} = {0,1}
+ * - SCTLR_EL1.M = 0
+ *
+ * The TGE part is interesting. If we have decided that this
+ * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
+ * {0,x}, and we only need to test for TGE == 1.
+ */
+ if (hcr & (HCR_DC | HCR_TGE)) {
+ wr->level = S1_MMU_DISABLED;
+ break;
+ }
+ fallthrough;
+ case TR_EL2:
+ case TR_EL20:
+ if (!(sctlr & SCTLR_ELx_M))
+ wr->level = S1_MMU_DISABLED;
+ break;
+ }
+
+ if (wr->level == S1_MMU_DISABLED) {
+ if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
+ goto addrsz;
+
+ wr->pa = va;
+ return 0;
+ }
+
+ wi->be = sctlr & SCTLR_ELx_EE;
+
+ wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
+ wi->hpd &= (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_HPD, tcr) :
+ (va55 ?
+ FIELD_GET(TCR_HPD1, tcr) :
+ FIELD_GET(TCR_HPD0, tcr)));
+
+ /* Someone was silly enough to encode TG0/TG1 differently */
+ if (va55) {
+ wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
+ tg = FIELD_GET(TCR_TG1_MASK, tcr);
+
+ switch (tg << TCR_TG1_SHIFT) {
+ case TCR_TG1_4K:
+ wi->pgshift = 12; break;
+ case TCR_TG1_16K:
+ wi->pgshift = 14; break;
+ case TCR_TG1_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ wi->pgshift = 16; break;
+ }
+ } else {
+ wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
+ tg = FIELD_GET(TCR_TG0_MASK, tcr);
+
+ switch (tg << TCR_TG0_SHIFT) {
+ case TCR_TG0_4K:
+ wi->pgshift = 12; break;
+ case TCR_TG0_16K:
+ wi->pgshift = 14; break;
+ case TCR_TG0_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ wi->pgshift = 16; break;
+ }
+ }
+
+ /* R_PLCGL, R_YXNYW */
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
+ if (wi->txsz > 39)
+ goto transfault_l0;
+ } else {
+ if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
+ goto transfault_l0;
+ }
+
+ /* R_GTJBY, R_SXWGM */
+ switch (BIT(wi->pgshift)) {
+ case SZ_4K:
+ lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
+ lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
+ break;
+ case SZ_16K:
+ lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
+ lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
+ break;
+ case SZ_64K:
+ lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
+ break;
+ }
+
+ if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
+ goto transfault_l0;
+
+ ia_bits = get_ia_size(wi);
+
+ /* R_YYVYV, I_THCZK */
+ if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
+ (va55 && va < GENMASK(63, ia_bits)))
+ goto transfault_l0;
+
+ /* I_ZFSYQ */
+ if (wi->regime != TR_EL2 &&
+ (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
+ goto transfault_l0;
+
+ /* R_BNDVG and following statements */
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
+ as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
+ goto transfault_l0;
+
+ /* AArch64.S1StartLevel() */
+ stride = wi->pgshift - 3;
+ wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
+
+ ps = (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
+
+ wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
+
+ /* Compute minimal alignment */
+ x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
+
+ wi->baddr = ttbr & TTBRx_EL1_BADDR;
+
+ /* R_VPBBF */
+ if (check_output_size(wi->baddr, wi))
+ goto addrsz;
+
+ wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
+
+ return 0;
+
+addrsz: /* Address Size Fault level 0 */
+ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
+ return -EFAULT;
+
+transfault_l0: /* Translation Fault level 0 */
+ fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
+ return -EFAULT;
+}
+
+static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va)
+{
+ u64 va_top, va_bottom, baddr, desc;
+ int level, stride, ret;
+
+ level = wi->sl;
+ stride = wi->pgshift - 3;
+ baddr = wi->baddr;
+
+ va_top = get_ia_size(wi) - 1;
+
+ while (1) {
+ u64 index, ipa;
+
+ va_bottom = (3 - level) * stride + wi->pgshift;
+ index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
+
+ ipa = baddr | index;
+
+ if (wi->s2) {
+ struct kvm_s2_trans s2_trans = {};
+
+ ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
+ if (ret) {
+ fail_s1_walk(wr,
+ (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
+ true, true);
+ return ret;
+ }
+
+ if (!kvm_s2_trans_readable(&s2_trans)) {
+ fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
+ true, true);
+
+ return -EPERM;
+ }
+
+ ipa = kvm_s2_trans_output(&s2_trans);
+ }
+
+ ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
+ if (ret) {
+ fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
+ true, false);
+ return ret;
+ }
+
+ if (wi->be)
+ desc = be64_to_cpu((__force __be64)desc);
+ else
+ desc = le64_to_cpu((__force __le64)desc);
+
+ /* Invalid descriptor */
+ if (!(desc & BIT(0)))
+ goto transfault;
+
+ /* Block mapping, check validity down the line */
+ if (!(desc & BIT(1)))
+ break;
+
+ /* Page mapping */
+ if (level == 3)
+ break;
+
+ /* Table handling */
+ if (!wi->hpd) {
+ wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
+ wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
+ wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
+ }
+
+ baddr = desc & GENMASK_ULL(47, wi->pgshift);
+
+ /* Check for out-of-range OA */
+ if (check_output_size(baddr, wi))
+ goto addrsz;
+
+ /* Prepare for next round */
+ va_top = va_bottom - 1;
+ level++;
+ }
+
+ /* Block mapping, check the validity of the level */
+ if (!(desc & BIT(1))) {
+ bool valid_block = false;
+
+ switch (BIT(wi->pgshift)) {
+ case SZ_4K:
+ valid_block = level == 1 || level == 2;
+ break;
+ case SZ_16K:
+ case SZ_64K:
+ valid_block = level == 2;
+ break;
+ }
+
+ if (!valid_block)
+ goto transfault;
+ }
+
+ if (check_output_size(desc & GENMASK(47, va_bottom), wi))
+ goto addrsz;
+
+ va_bottom += contiguous_bit_shift(desc, wi, level);
+
+ wr->failed = false;
+ wr->level = level;
+ wr->desc = desc;
+ wr->pa = desc & GENMASK(47, va_bottom);
+ wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
+
+ return 0;
+
+addrsz:
+ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
+ return -EINVAL;
+transfault:
+ fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
+ return -ENOENT;
+}
+
+struct mmu_config {
+ u64 ttbr0;
+ u64 ttbr1;
+ u64 tcr;
+ u64 mair;
+ u64 sctlr;
+ u64 vttbr;
+ u64 vtcr;
+ u64 hcr;
+};
+
+static void __mmu_config_save(struct mmu_config *config)
+{
+ config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
+ config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
+ config->tcr = read_sysreg_el1(SYS_TCR);
+ config->mair = read_sysreg_el1(SYS_MAIR);
+ config->sctlr = read_sysreg_el1(SYS_SCTLR);
+ config->vttbr = read_sysreg(vttbr_el2);
+ config->vtcr = read_sysreg(vtcr_el2);
+ config->hcr = read_sysreg(hcr_el2);
+}
+
+static void __mmu_config_restore(struct mmu_config *config)
+{
+ write_sysreg(config->hcr, hcr_el2);
+
+ /*
+ * ARM errata 1165522 and 1530923 require TGE to be 1 before
+ * we update the guest state.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+
+ write_sysreg_el1(config->ttbr0, SYS_TTBR0);
+ write_sysreg_el1(config->ttbr1, SYS_TTBR1);
+ write_sysreg_el1(config->tcr, SYS_TCR);
+ write_sysreg_el1(config->mair, SYS_MAIR);
+ write_sysreg_el1(config->sctlr, SYS_SCTLR);
+ write_sysreg(config->vttbr, vttbr_el2);
+ write_sysreg(config->vtcr, vtcr_el2);
+}
+
+static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ u64 host_pan;
+ bool fail;
+
+ host_pan = read_sysreg_s(SYS_PSTATE_PAN);
+ write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
+
+ switch (op) {
+ case OP_AT_S1E1RP:
+ fail = __kvm_at(OP_AT_S1E1RP, vaddr);
+ break;
+ case OP_AT_S1E1WP:
+ fail = __kvm_at(OP_AT_S1E1WP, vaddr);
+ break;
+ }
+
+ write_sysreg_s(host_pan, SYS_PSTATE_PAN);
+
+ return fail;
+}
+
+#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
+#define MEMATTR_NC 0b0100
+#define MEMATTR_Wt 0b1000
+#define MEMATTR_Wb 0b1100
+#define MEMATTR_WbRaWa 0b1111
+
+#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
+
+static u8 s2_memattr_to_attr(u8 memattr)
+{
+ memattr &= 0b1111;
+
+ switch (memattr) {
+ case 0b0000:
+ case 0b0001:
+ case 0b0010:
+ case 0b0011:
+ return memattr << 2;
+ case 0b0100:
+ return MEMATTR(Wb, Wb);
+ case 0b0101:
+ return MEMATTR(NC, NC);
+ case 0b0110:
+ return MEMATTR(Wt, NC);
+ case 0b0111:
+ return MEMATTR(Wb, NC);
+ case 0b1000:
+ /* Reserved, assume NC */
+ return MEMATTR(NC, NC);
+ case 0b1001:
+ return MEMATTR(NC, Wt);
+ case 0b1010:
+ return MEMATTR(Wt, Wt);
+ case 0b1011:
+ return MEMATTR(Wb, Wt);
+ case 0b1100:
+ /* Reserved, assume NC */
+ return MEMATTR(NC, NC);
+ case 0b1101:
+ return MEMATTR(NC, Wb);
+ case 0b1110:
+ return MEMATTR(Wt, Wb);
+ case 0b1111:
+ return MEMATTR(Wb, Wb);
+ default:
+ unreachable();
+ }
+}
+
+static u8 combine_s1_s2_attr(u8 s1, u8 s2)
+{
+ bool transient;
+ u8 final = 0;
+
+ /* Upgrade transient s1 to non-transient to simplify things */
+ switch (s1) {
+ case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
+ transient = true;
+ s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
+ break;
+ case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
+ transient = true;
+ s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
+ break;
+ default:
+ transient = false;
+ }
+
+ /* S2CombineS1AttrHints() */
+ if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
+ (s2 & GENMASK(3, 2)) == MEMATTR_NC)
+ final = MEMATTR_NC;
+ else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
+ (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
+ final = MEMATTR_Wt;
+ else
+ final = MEMATTR_Wb;
+
+ if (final != MEMATTR_NC) {
+ /* Inherit RaWa hints form S1 */
+ if (transient) {
+ switch (s1 & GENMASK(3, 2)) {
+ case MEMATTR_Wt:
+ final = 0;
+ break;
+ case MEMATTR_Wb:
+ final = MEMATTR_NC;
+ break;
+ }
+ }
+
+ final |= s1 & GENMASK(1, 0);
+ }
+
+ return final;
+}
+
+#define ATTR_NSH 0b00
+#define ATTR_RSV 0b01
+#define ATTR_OSH 0b10
+#define ATTR_ISH 0b11
+
+static u8 compute_sh(u8 attr, u64 desc)
+{
+ u8 sh;
+
+ /* Any form of device, as well as NC has SH[1:0]=0b10 */
+ if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
+ return ATTR_OSH;
+
+ sh = FIELD_GET(PTE_SHARED, desc);
+ if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
+ sh = ATTR_NSH;
+
+ return sh;
+}
+
+static u8 combine_sh(u8 s1_sh, u8 s2_sh)
+{
+ if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
+ return ATTR_OSH;
+ if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
+ return ATTR_ISH;
+
+ return ATTR_NSH;
+}
+
+static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
+ struct kvm_s2_trans *tr)
+{
+ u8 s1_parattr, s2_memattr, final_attr;
+ u64 par;
+
+ /* If S2 has failed to translate, report the damage */
+ if (tr->esr) {
+ par = SYS_PAR_EL1_RES1;
+ par |= SYS_PAR_EL1_F;
+ par |= SYS_PAR_EL1_S;
+ par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
+ return par;
+ }
+
+ s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
+ s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
+
+ if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
+ s2_memattr &= ~BIT(3);
+
+ /* Combination of R_VRJSW and R_RHWZM */
+ switch (s2_memattr) {
+ case 0b0101:
+ if (MEMATTR_IS_DEVICE(s1_parattr))
+ final_attr = s1_parattr;
+ else
+ final_attr = MEMATTR(NC, NC);
+ break;
+ case 0b0110:
+ case 0b1110:
+ final_attr = MEMATTR(WbRaWa, WbRaWa);
+ break;
+ case 0b0111:
+ case 0b1111:
+ /* Preserve S1 attribute */
+ final_attr = s1_parattr;
+ break;
+ case 0b0100:
+ case 0b1100:
+ case 0b1101:
+ /* Reserved, do something non-silly */
+ final_attr = s1_parattr;
+ break;
+ default:
+ /* MemAttr[2]=0, Device from S2 */
+ final_attr = s2_memattr & GENMASK(1,0) << 2;
+ }
+ } else {
+ /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
+ u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
+
+ if (MEMATTR_IS_DEVICE(s1_parattr) ||
+ MEMATTR_IS_DEVICE(s2_parattr)) {
+ final_attr = min(s1_parattr, s2_parattr);
+ } else {
+ /* At this stage, this is memory vs memory */
+ final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
+ s2_parattr & 0xf);
+ final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
+ s2_parattr >> 4) << 4;
+ }
+ }
+
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
+ !MEMATTR_IS_DEVICE(final_attr))
+ final_attr = MEMATTR(NC, NC);
+
+ par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
+ par |= tr->output & GENMASK(47, 12);
+ par |= FIELD_PREP(SYS_PAR_EL1_SH,
+ combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
+ compute_sh(final_attr, tr->desc)));
+
+ return par;
+}
+
+static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
+ enum trans_regime regime)
+{
+ u64 par;
+
+ if (wr->failed) {
+ par = SYS_PAR_EL1_RES1;
+ par |= SYS_PAR_EL1_F;
+ par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
+ par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
+ par |= wr->s2 ? SYS_PAR_EL1_S : 0;
+ } else if (wr->level == S1_MMU_DISABLED) {
+ /* MMU off or HCR_EL2.DC == 1 */
+ par = SYS_PAR_EL1_NSE;
+ par |= wr->pa & GENMASK_ULL(47, 12);
+
+ if (regime == TR_EL10 &&
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
+ par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
+ MEMATTR(WbRaWa, WbRaWa));
+ par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
+ } else {
+ par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
+ par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
+ }
+ } else {
+ u64 mair, sctlr;
+ u8 sh;
+
+ par = SYS_PAR_EL1_NSE;
+
+ mair = (regime == TR_EL10 ?
+ vcpu_read_sys_reg(vcpu, MAIR_EL1) :
+ vcpu_read_sys_reg(vcpu, MAIR_EL2));
+
+ mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
+ mair &= 0xff;
+
+ sctlr = (regime == TR_EL10 ?
+ vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
+ vcpu_read_sys_reg(vcpu, SCTLR_EL2));
+
+ /* Force NC for memory if SCTLR_ELx.C is clear */
+ if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
+ mair = MEMATTR(NC, NC);
+
+ par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
+ par |= wr->pa & GENMASK_ULL(47, 12);
+
+ sh = compute_sh(mair, wr->desc);
+ par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
+ }
+
+ return par;
+}
+
+static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
+{
+ u64 sctlr;
+
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
+ return false;
+
+ if (regime == TR_EL10)
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ else
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+
+ return sctlr & SCTLR_EL1_EPAN;
+}
+
+static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ bool perm_fail, ur, uw, ux, pr, pw, px;
+ struct s1_walk_result wr = {};
+ struct s1_walk_info wi = {};
+ int ret, idx;
+
+ ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
+ if (ret)
+ goto compute_par;
+
+ if (wr.level == S1_MMU_DISABLED)
+ goto compute_par;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = walk_s1(vcpu, &wi, &wr, vaddr);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (ret)
+ goto compute_par;
+
+ /* FIXME: revisit when adding indirect permission support */
+ /* AArch64.S1DirectBasePermissions() */
+ if (wi.regime != TR_EL2) {
+ switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) {
+ case 0b00:
+ pr = pw = true;
+ ur = uw = false;
+ break;
+ case 0b01:
+ pr = pw = ur = uw = true;
+ break;
+ case 0b10:
+ pr = true;
+ pw = ur = uw = false;
+ break;
+ case 0b11:
+ pr = ur = true;
+ pw = uw = false;
+ break;
+ }
+
+ switch (wr.APTable) {
+ case 0b00:
+ break;
+ case 0b01:
+ ur = uw = false;
+ break;
+ case 0b10:
+ pw = uw = false;
+ break;
+ case 0b11:
+ pw = ur = uw = false;
+ break;
+ }
+
+ /* We don't use px for anything yet, but hey... */
+ px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw);
+ ux = !((wr.desc & PTE_UXN) || wr.UXNTable);
+
+ if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) {
+ bool pan;
+
+ pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
+ pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux);
+ pw &= !pan;
+ pr &= !pan;
+ }
+ } else {
+ ur = uw = ux = false;
+
+ if (!(wr.desc & PTE_RDONLY)) {
+ pr = pw = true;
+ } else {
+ pr = true;
+ pw = false;
+ }
+
+ if (wr.APTable & BIT(1))
+ pw = false;
+
+ /* XN maps to UXN */
+ px = !((wr.desc & PTE_UXN) || wr.UXNTable);
+ }
+
+ perm_fail = false;
+
+ switch (op) {
+ case OP_AT_S1E1RP:
+ case OP_AT_S1E1R:
+ case OP_AT_S1E2R:
+ perm_fail = !pr;
+ break;
+ case OP_AT_S1E1WP:
+ case OP_AT_S1E1W:
+ case OP_AT_S1E2W:
+ perm_fail = !pw;
+ break;
+ case OP_AT_S1E0R:
+ perm_fail = !ur;
+ break;
+ case OP_AT_S1E0W:
+ perm_fail = !uw;
+ break;
+ case OP_AT_S1E1A:
+ case OP_AT_S1E2A:
+ break;
+ default:
+ BUG();
+ }
+
+ if (perm_fail)
+ fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
+
+compute_par:
+ return compute_par_s1(vcpu, &wr, wi.regime);
+}
+
+/*
+ * Return the PAR_EL1 value as the result of a valid translation.
+ *
+ * If the translation is unsuccessful, the value may only contain
+ * PAR_EL1.F, and cannot be taken at face value. It isn't an
+ * indication of the translation having failed, only that the fast
+ * path did not succeed, *unless* it indicates a S1 permission fault.
+ */
+static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ struct mmu_config config;
+ struct kvm_s2_mmu *mmu;
+ bool fail;
+ u64 par;
+
+ par = SYS_PAR_EL1_F;
+
+ /*
+ * We've trapped, so everything is live on the CPU. As we will
+ * be switching contexts behind everybody's back, disable
+ * interrupts while holding the mmu lock.
+ */
+ guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
+
+ /*
+ * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
+ * the right one (as we trapped from vEL2). If not, save the
+ * full MMU context.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
+ goto skip_mmu_switch;
+
+ /*
+ * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
+ * find it (recycled by another vcpu, for example). When this
+ * happens, admit defeat immediately and use the SW (slow) path.
+ */
+ mmu = lookup_s2_mmu(vcpu);
+ if (!mmu)
+ return par;
+
+ __mmu_config_save(&config);
+
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
+ __load_stage2(mmu, mmu->arch);
+
+skip_mmu_switch:
+ /* Clear TGE, enable S2 translation, we're rolling */
+ write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2);
+ isb();
+
+ switch (op) {
+ case OP_AT_S1E1RP:
+ case OP_AT_S1E1WP:
+ fail = at_s1e1p_fast(vcpu, op, vaddr);
+ break;
+ case OP_AT_S1E1R:
+ fail = __kvm_at(OP_AT_S1E1R, vaddr);
+ break;
+ case OP_AT_S1E1W:
+ fail = __kvm_at(OP_AT_S1E1W, vaddr);
+ break;
+ case OP_AT_S1E0R:
+ fail = __kvm_at(OP_AT_S1E0R, vaddr);
+ break;
+ case OP_AT_S1E0W:
+ fail = __kvm_at(OP_AT_S1E0W, vaddr);
+ break;
+ case OP_AT_S1E1A:
+ fail = __kvm_at(OP_AT_S1E1A, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ fail = true;
+ break;
+ }
+
+ if (!fail)
+ par = read_sysreg_par();
+
+ if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ __mmu_config_restore(&config);
+
+ return par;
+}
+
+static bool par_check_s1_perm_fault(u64 par)
+{
+ u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
+
+ return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
+ !(par & SYS_PAR_EL1_S));
+}
+
+void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
+
+ /*
+ * If PAR_EL1 reports that AT failed on a S1 permission fault, we
+ * know for sure that the PTW was able to walk the S1 tables and
+ * there's nothing else to do.
+ *
+ * If AT failed for any other reason, then we must walk the guest S1
+ * to emulate the instruction.
+ */
+ if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
+ par = handle_at_slow(vcpu, op, vaddr);
+
+ vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}
+
+void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ u64 par;
+
+ /*
+ * We've trapped, so everything is live on the CPU. As we will be
+ * switching context behind everybody's back, disable interrupts...
+ */
+ scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
+ struct kvm_s2_mmu *mmu;
+ u64 val, hcr;
+ bool fail;
+
+ mmu = &vcpu->kvm->arch.mmu;
+
+ val = hcr = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ val |= HCR_VM;
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ val |= HCR_NV | HCR_NV1;
+
+ write_sysreg(val, hcr_el2);
+ isb();
+
+ par = SYS_PAR_EL1_F;
+
+ switch (op) {
+ case OP_AT_S1E2R:
+ fail = __kvm_at(OP_AT_S1E1R, vaddr);
+ break;
+ case OP_AT_S1E2W:
+ fail = __kvm_at(OP_AT_S1E1W, vaddr);
+ break;
+ case OP_AT_S1E2A:
+ fail = __kvm_at(OP_AT_S1E1A, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ fail = true;
+ }
+
+ isb();
+
+ if (!fail)
+ par = read_sysreg_par();
+
+ write_sysreg(hcr, hcr_el2);
+ isb();
+ }
+
+ /* We failed the translation, let's replay it in slow motion */
+ if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
+ par = handle_at_slow(vcpu, op, vaddr);
+
+ vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}
+
+void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ struct kvm_s2_trans out = {};
+ u64 ipa, par;
+ bool write;
+ int ret;
+
+ /* Do the stage-1 translation */
+ switch (op) {
+ case OP_AT_S12E1R:
+ op = OP_AT_S1E1R;
+ write = false;
+ break;
+ case OP_AT_S12E1W:
+ op = OP_AT_S1E1W;
+ write = true;
+ break;
+ case OP_AT_S12E0R:
+ op = OP_AT_S1E0R;
+ write = false;
+ break;
+ case OP_AT_S12E0W:
+ op = OP_AT_S1E0W;
+ write = true;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ __kvm_at_s1e01(vcpu, op, vaddr);
+ par = vcpu_read_sys_reg(vcpu, PAR_EL1);
+ if (par & SYS_PAR_EL1_F)
+ return;
+
+ /*
+ * If we only have a single stage of translation (E2H=0 or
+ * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
+ !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
+ return;
+
+ /* Do the stage-2 translation */
+ ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
+ out.esr = 0;
+ ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+ if (ret < 0)
+ return;
+
+ /* Check the access permission */
+ if (!out.esr &&
+ ((!write && !out.readable) || (write && !out.writable)))
+ out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
+
+ par = compute_par_s12(vcpu, par, &out);
+ vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 05166eccea0a..05b6435d02a9 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -83,14 +83,20 @@ enum cgt_group_id {
CGT_CPTR_TAM,
CGT_CPTR_TCPAC,
+ CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
+ CGT_ICH_HCR_TC,
+ CGT_ICH_HCR_TALL0,
+ CGT_ICH_HCR_TALL1,
+ CGT_ICH_HCR_TDIR,
+
/*
* Anything after this point is a combination of coarse trap
* controls, which must all be evaluated to decide what to do.
*/
__MULTIPLE_CONTROL_BITS__,
- CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
+ CGT_HCR_IMO_FMO_ICH_HCR_TC = __MULTIPLE_CONTROL_BITS__,
CGT_HCR_TID2_TID4,
CGT_HCR_TTLB_TTLBIS,
CGT_HCR_TTLB_TTLBOS,
@@ -105,6 +111,8 @@ enum cgt_group_id {
CGT_MDCR_TDE_TDRA,
CGT_MDCR_TDCC_TDE_TDA,
+ CGT_ICH_HCR_TC_TDIR,
+
/*
* Anything after this point requires a callback evaluating a
* complex trap condition. Ugly stuff.
@@ -372,12 +380,42 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = CPTR_EL2_TCPAC,
.behaviour = BEHAVE_FORWARD_ANY,
},
+ [CGT_HCRX_EnFPM] = {
+ .index = HCRX_EL2,
+ .value = 0,
+ .mask = HCRX_EL2_EnFPM,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
[CGT_HCRX_TCR2En] = {
.index = HCRX_EL2,
.value = 0,
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_ANY,
},
+ [CGT_ICH_HCR_TC] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TC,
+ .mask = ICH_HCR_TC,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_ICH_HCR_TALL0] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TALL0,
+ .mask = ICH_HCR_TALL0,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_ICH_HCR_TALL1] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TALL1,
+ .mask = ICH_HCR_TALL1,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_ICH_HCR_TDIR] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TDIR,
+ .mask = ICH_HCR_TDIR,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
};
#define MCB(id, ...) \
@@ -387,7 +425,6 @@ static const struct trap_bits coarse_trap_bits[] = {
}
static const enum cgt_group_id *coarse_control_combo[] = {
- MCB(CGT_HCR_IMO_FMO, CGT_HCR_IMO, CGT_HCR_FMO),
MCB(CGT_HCR_TID2_TID4, CGT_HCR_TID2, CGT_HCR_TID4),
MCB(CGT_HCR_TTLB_TTLBIS, CGT_HCR_TTLB, CGT_HCR_TTLBIS),
MCB(CGT_HCR_TTLB_TTLBOS, CGT_HCR_TTLB, CGT_HCR_TTLBOS),
@@ -402,6 +439,9 @@ static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_MDCR_TDE_TDOSA, CGT_MDCR_TDE, CGT_MDCR_TDOSA),
MCB(CGT_MDCR_TDE_TDRA, CGT_MDCR_TDE, CGT_MDCR_TDRA),
MCB(CGT_MDCR_TDCC_TDE_TDA, CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
+
+ MCB(CGT_HCR_IMO_FMO_ICH_HCR_TC, CGT_HCR_IMO, CGT_HCR_FMO, CGT_ICH_HCR_TC),
+ MCB(CGT_ICH_HCR_TC_TDIR, CGT_ICH_HCR_TC, CGT_ICH_HCR_TDIR),
};
typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
@@ -536,9 +576,9 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CSSELR_EL1, CGT_HCR_TID2_TID4),
SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
- SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO),
- SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO),
- SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO),
+ SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
@@ -786,6 +826,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_AT_S12E1W, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E0R, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E0W, CGT_HCR_NV),
+ SR_TRAP(OP_AT_S1E2A, CGT_HCR_NV),
SR_TRAP(OP_TLBI_IPAS2E1, CGT_HCR_NV),
SR_TRAP(OP_TLBI_RIPAS2E1, CGT_HCR_NV),
SR_TRAP(OP_TLBI_IPAS2LE1, CGT_HCR_NV),
@@ -867,6 +908,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_AT_S1E0W, CGT_HCR_AT),
SR_TRAP(OP_AT_S1E1RP, CGT_HCR_AT),
SR_TRAP(OP_AT_S1E1WP, CGT_HCR_AT),
+ SR_TRAP(OP_AT_S1E1A, CGT_HCR_AT),
SR_TRAP(SYS_ERXPFGF_EL1, CGT_HCR_nFIEN),
SR_TRAP(SYS_ERXPFGCTL_EL1, CGT_HCR_nFIEN),
SR_TRAP(SYS_ERXPFGCDN_EL1, CGT_HCR_nFIEN),
@@ -1108,6 +1150,35 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN),
SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN),
SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN),
+ SR_TRAP(SYS_FPMR, CGT_HCRX_EnFPM),
+ /*
+ * IMPDEF choice:
+ * We treat ICC_SRE_EL2.{SRE,Enable) and ICV_SRE_EL1.SRE as
+ * RAO/WI. We therefore never consider ICC_SRE_EL2.Enable for
+ * ICC_SRE_EL1 access, and always handle it locally.
+ */
+ SR_TRAP(SYS_ICC_AP0R0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP0R1_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP0R2_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP0R3_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP1R0_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_AP1R1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_AP1R2_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_AP1R3_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_BPR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_BPR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_CTLR_EL1, CGT_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_DIR_EL1, CGT_ICH_HCR_TC_TDIR),
+ SR_TRAP(SYS_ICC_EOIR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_EOIR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_HPPIR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_HPPIR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_IAR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_IAR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_IGRPEN0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_IGRPEN1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_PMR_EL1, CGT_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_RPR_EL1, CGT_ICH_HCR_TC),
};
static DEFINE_XARRAY(sr_forward_xa);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index c53e5b14038d..ea5484ce1f3b 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -63,6 +63,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
*/
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
*host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
+ *host_data_ptr(fpmr_ptr) = kern_hyp_va(&current->thread.uw.fpmr);
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
@@ -134,8 +135,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
fp_state.sme_state = NULL;
- fp_state.svcr = &vcpu->arch.svcr;
- fp_state.fpmr = &vcpu->arch.fpmr;
+ fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR);
+ fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR);
fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu))
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 11098eb7eb44..962f985977c2 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1045,6 +1045,11 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
mutex_lock(&kvm->slots_lock);
+ if (write && atomic_read(&kvm->nr_memslots_dirty_logging)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
while (length > 0) {
kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
void *maddr;
@@ -1059,6 +1064,7 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
page = pfn_to_online_page(pfn);
if (!page) {
/* Reject ZONE_DEVICE memory */
+ kvm_release_pfn_clean(pfn);
ret = -EFAULT;
goto out;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 9e13c1bc2ad5..17df94570f03 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -14,6 +14,7 @@
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
+ int ret;
u64 par, tmp;
/*
@@ -27,7 +28,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
* saved the guest context yet, and we may return early...
*/
par = read_sysreg_par();
- if (!__kvm_at("s1e1r", far))
+ ret = system_supports_poe() ? __kvm_at(OP_AT_S1E1A, far) :
+ __kvm_at(OP_AT_S1E1R, far);
+ if (!ret)
tmp = read_sysreg_par();
else
tmp = SYS_PAR_EL1_F; /* back to the guest */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 37ff87d782b6..46d52e8a3df3 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -403,6 +403,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
else
__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
+ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
+ write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
+
/* Skip restoring fpexc32 for AArch64 guests */
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 4c0fdabaf8ae..1579a3c08a36 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -16,9 +16,15 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt);
+
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+
+ // POR_EL0 can affect uaccess, so must be saved/restored early.
+ if (ctxt_has_s1poe(ctxt))
+ ctxt_sys_reg(ctxt, POR_EL0) = read_sysreg_s(SYS_POR_EL0);
}
static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
@@ -66,6 +72,17 @@ static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP);
}
+static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!system_supports_poe())
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1POE, IMP);
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
@@ -80,6 +97,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
}
+
+ if (ctxt_has_s1poe(ctxt))
+ ctxt_sys_reg(ctxt, POR_EL1) = read_sysreg_el1(SYS_POR);
}
ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
@@ -120,6 +140,10 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+
+ // POR_EL0 can affect uaccess, so must be saved/restored early.
+ if (ctxt_has_s1poe(ctxt))
+ write_sysreg_s(ctxt_sys_reg(ctxt, POR_EL0), SYS_POR_EL0);
}
static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
@@ -158,6 +182,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
}
+
+ if (ctxt_has_s1poe(ctxt))
+ write_sysreg_el1(ctxt_sys_reg(ctxt, POR_EL1), SYS_POR);
}
write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index e715c157c2c4..e433dfab882a 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -426,9 +426,9 @@ out:
return;
}
-static __always_inline void do_ffa_mem_xfer(const u64 func_id,
- struct arm_smccc_res *res,
- struct kvm_cpu_context *ctxt)
+static void __do_ffa_mem_xfer(const u64 func_id,
+ struct arm_smccc_res *res,
+ struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, len, ctxt, 1);
DECLARE_REG(u32, fraglen, ctxt, 2);
@@ -440,9 +440,6 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
u32 offset, nr_ranges;
int ret = 0;
- BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE &&
- func_id != FFA_FN64_MEM_LEND);
-
if (addr_mbz || npages_mbz || fraglen > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
ret = FFA_RET_INVALID_PARAMETERS;
@@ -461,6 +458,11 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
+ if (len > ffa_desc_buf.len) {
+ ret = FFA_RET_NO_MEMORY;
+ goto out_unlock;
+ }
+
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
@@ -512,6 +514,13 @@ err_unshare:
goto out_unlock;
}
+#define do_ffa_mem_xfer(fid, res, ctxt) \
+ do { \
+ BUILD_BUG_ON((fid) != FFA_FN64_MEM_SHARE && \
+ (fid) != FFA_FN64_MEM_LEND); \
+ __do_ffa_mem_xfer((fid), (res), (ctxt)); \
+ } while (0);
+
static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 07120b37da35..401af1835be6 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -130,7 +130,7 @@ alternative_else_nop_endif
/* Invalidate the stale TLBs from Bootloader */
tlbi alle2
- tlbi vmalls12e1
+ tlbi alle1
dsb sy
mov_q x0, INIT_SCTLR_EL2_MMU_ON
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index f43d845f3c4e..87692b566d90 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -62,6 +62,8 @@ static void fpsimd_sve_flush(void)
static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
{
+ bool has_fpmr;
+
if (!guest_owns_fp_regs())
return;
@@ -73,11 +75,18 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
else
__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+ has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
+ if (has_fpmr)
+ __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR);
+
if (system_supports_sve())
__hyp_sve_restore_host();
else
__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+ if (has_fpmr)
+ write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
+
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 8f5c56d5b1cd..cc69106734ca 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -197,6 +197,15 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
} else {
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
}
+
+ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) {
+ u64 val = read_sysreg_s(SYS_FPMR);
+
+ if (unlikely(is_protected_kvm_enabled()))
+ *host_data_ptr(fpmr) = val;
+ else
+ **host_data_ptr(fpmr_ptr) = val;
+ }
}
static const exit_handler_fn hyp_exit_handlers[] = {
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index ca3c09df8d7c..48da9ca9763f 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -132,10 +132,10 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
else
__load_host_stage2();
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the old VMID */
- isb();
+ /* Ensure write of the old VMID */
+ isb();
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
if (!(cxt->sctlr & SCTLR_ELx_M)) {
write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
isb();
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 9e2bbee77491..b11bcebac908 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -17,48 +17,6 @@
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
-#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
-
-#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
-
-#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
-
-#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
- KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
- KVM_PTE_LEAF_ATTR_HI_S2_XN)
-
-#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
-#define KVM_MAX_OWNER_ID 1
-
-/*
- * Used to indicate a pte for which a 'break-before-make' sequence is in
- * progress.
- */
-#define KVM_INVALID_PTE_LOCKED BIT(10)
-
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
@@ -1547,7 +1505,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
*/
new = kvm_init_table_pte(childp, mm_ops);
stage2_make_pte(ctx, new);
- dsb(ishst);
return 0;
}
@@ -1559,8 +1516,11 @@ int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
.flags = KVM_PGTABLE_WALK_LEAF,
.arg = mc,
};
+ int ret;
- return kvm_pgtable_walk(pgt, addr, size, &walker);
+ ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+ dsb(ishst);
+ return ret;
}
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 7b397fad26f2..18d4677002b1 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -268,8 +268,16 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
* starting to mess with the rest of the GIC, and VMCR_EL2 in
* particular. This logic must be called before
* __vgic_v3_restore_state().
+ *
+ * However, if the vgic is disabled (ICH_HCR_EL2.EN==0), no GIC is
+ * provisioned at all. In order to prevent illegal accesses to the
+ * system registers to trap to EL1 (duh), force ICC_SRE_EL1.SRE to 1
+ * so that the trap bits can take effect. Yes, we *loves* the GIC.
*/
- if (!cpu_if->vgic_sre) {
+ if (!(cpu_if->vgic_hcr & ICH_HCR_EN)) {
+ write_gicreg(ICC_SRE_EL1_SRE, ICC_SRE_EL1);
+ isb();
+ } else if (!cpu_if->vgic_sre) {
write_gicreg(0, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
@@ -288,8 +296,9 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
- * Prevent the guest from touching the GIC system registers if
- * SRE isn't enabled for GICv3 emulation.
+ * Prevent the guest from touching the ICC_SRE_EL1 system
+ * register. Note that this may not have any effect, as
+ * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
*/
write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
ICC_SRE_EL2);
@@ -297,10 +306,11 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
- * injected,
+ * injected. Note that this also applies if we don't expect
+ * any system register access (no vgic at all).
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
+ cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
@@ -326,7 +336,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
* no interrupts were being injected, and we disable it again here.
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
+ cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(0, ICH_HCR_EL2);
}
@@ -1032,6 +1042,75 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
+ u32 sysreg, bool is_read)
+{
+ u64 ich_hcr;
+
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return false;
+
+ ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
+
+ switch (sysreg) {
+ case SYS_ICC_IGRPEN0_EL1:
+ if (is_read &&
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ if (!is_read &&
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_AP0Rn_EL1(0):
+ case SYS_ICC_AP0Rn_EL1(1):
+ case SYS_ICC_AP0Rn_EL1(2):
+ case SYS_ICC_AP0Rn_EL1(3):
+ case SYS_ICC_BPR0_EL1:
+ case SYS_ICC_EOIR0_EL1:
+ case SYS_ICC_HPPIR0_EL1:
+ case SYS_ICC_IAR0_EL1:
+ return ich_hcr & ICH_HCR_TALL0;
+
+ case SYS_ICC_IGRPEN1_EL1:
+ if (is_read &&
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ if (!is_read &&
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_AP1Rn_EL1(0):
+ case SYS_ICC_AP1Rn_EL1(1):
+ case SYS_ICC_AP1Rn_EL1(2):
+ case SYS_ICC_AP1Rn_EL1(3):
+ case SYS_ICC_BPR1_EL1:
+ case SYS_ICC_EOIR1_EL1:
+ case SYS_ICC_HPPIR1_EL1:
+ case SYS_ICC_IAR1_EL1:
+ return ich_hcr & ICH_HCR_TALL1;
+
+ case SYS_ICC_DIR_EL1:
+ if (ich_hcr & ICH_HCR_TDIR)
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_RPR_EL1:
+ case SYS_ICC_CTLR_EL1:
+ case SYS_ICC_PMR_EL1:
+ return ich_hcr & ICH_HCR_TC;
+
+ default:
+ return false;
+ }
+}
+
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
@@ -1041,6 +1120,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
bool is_read;
u32 sysreg;
+ if (kern_hyp_va(vcpu->kvm)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
+ return 0;
+
esr = kvm_vcpu_get_esr(vcpu);
if (vcpu_mode_is_32bit(vcpu)) {
if (!kvm_condition_valid(vcpu)) {
@@ -1055,6 +1137,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+ if (__vgic_v3_check_trap_forwarding(vcpu, sysreg, is_read))
+ return 0;
+
switch (sysreg) {
case SYS_ICC_IAR0_EL1:
case SYS_ICC_IAR1_EL1:
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 77010b76c150..80581b1c3995 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -312,6 +312,9 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
{
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+
+ if (kvm_has_fpmr(vcpu->kvm))
+ **host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR);
}
static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 6981b1bc0946..a509b63bd4dd 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1540,8 +1540,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_pagesize = min(vma_pagesize, (long)max_map_size);
}
- if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
+ /*
+ * Both the canonical IPA and fault IPA must be hugepage-aligned to
+ * ensure we find the right PFN and lay down the mapping in the right
+ * place.
+ */
+ if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) {
fault_ipa &= ~(vma_pagesize - 1);
+ ipa &= ~(vma_pagesize - 1);
+ }
gfn = ipa >> PAGE_SHIFT;
mte_allowed = kvm_vma_mte_allowed(vma);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index bab27f9d8cc6..638b6205526f 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -103,20 +103,6 @@ struct s2_walk_info {
bool be;
};
-static unsigned int ps_to_output_size(unsigned int ps)
-{
- switch (ps) {
- case 0: return 32;
- case 1: return 36;
- case 2: return 40;
- case 3: return 42;
- case 4: return 44;
- case 5:
- default:
- return 48;
- }
-}
-
static u32 compute_fsc(int level, u32 fsc)
{
return fsc | (level & 0x3);
@@ -256,7 +242,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
/* Check for valid descriptor at this point */
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
@@ -266,7 +252,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
if (check_output_size(wi, desc)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
@@ -278,27 +264,24 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
if (level < first_block_level) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
- /*
- * We don't use the contiguous bit in the stage-2 ptes, so skip check
- * for misprogramming of the contiguous bit.
- */
-
if (check_output_size(wi, desc)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
if (!(desc & BIT(10))) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
+ addr_bottom += contiguous_bit_shift(desc, wi, level);
+
/* Calculate and return the result */
paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
@@ -307,7 +290,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
out->readable = desc & (0b01 << 6);
out->writable = desc & (0b10 << 6);
out->level = level;
- out->upper_attr = desc & GENMASK_ULL(63, 52);
+ out->desc = desc;
return 0;
}
@@ -954,19 +937,16 @@ static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1)
int kvm_init_nv_sysregs(struct kvm *kvm)
{
u64 res0, res1;
- int ret = 0;
- mutex_lock(&kvm->arch.config_lock);
+ lockdep_assert_held(&kvm->arch.config_lock);
if (kvm->arch.sysreg_masks)
- goto out;
+ return 0;
kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)),
GFP_KERNEL_ACCOUNT);
- if (!kvm->arch.sysreg_masks) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!kvm->arch.sysreg_masks)
+ return -ENOMEM;
limit_nv_id_regs(kvm);
@@ -1195,8 +1175,13 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
res0 |= ~(res0 | res1);
set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
-out:
- mutex_unlock(&kvm->arch.config_lock);
- return ret;
+ /* SCTLR_EL1 */
+ res0 = SCTLR_EL1_RES0;
+ res1 = SCTLR_EL1_RES1;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
+ res0 |= SCTLR_EL1_EPAN;
+ set_sysreg_masks(kvm, SCTLR_EL1, res0, res1);
+
+ return 0;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 82a2a003259c..ac36c438b8c1 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -233,7 +233,7 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++)
pmu->pmc[i].idx = i;
}
@@ -260,7 +260,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
{
int i;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++)
kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
irq_work_sync(&vcpu->arch.pmu.overflow_work);
}
@@ -291,7 +291,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val)
return;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
struct kvm_pmc *pmc;
if (!(val & BIT(i)))
@@ -323,7 +323,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
if (!kvm_vcpu_has_pmu(vcpu) || !val)
return;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
struct kvm_pmc *pmc;
if (!(val & BIT(i)))
@@ -910,10 +910,10 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm)
struct arm_pmu *arm_pmu = kvm->arch.arm_pmu;
/*
- * The arm_pmu->num_events considers the cycle counter as well.
- * Ignore that and return only the general-purpose counters.
+ * The arm_pmu->cntr_mask considers the fixed counter(s) as well.
+ * Ignore those and return only the general-purpose counters.
*/
- return arm_pmu->num_events - 1;
+ return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS);
}
static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 329819806096..0b3adf3e17b4 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -5,6 +5,8 @@
*/
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/perf/arm_pmuv3.h>
static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
@@ -35,7 +37,7 @@ struct kvm_pmu_events *kvm_get_pmu_events(void)
* Add events to track that we may want to switch at guest entry/exit
* time.
*/
-void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
+void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
@@ -51,7 +53,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
/*
* Stop tracking events
*/
-void kvm_clr_pmu_events(u32 clr)
+void kvm_clr_pmu_events(u64 clr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
@@ -62,79 +64,32 @@ void kvm_clr_pmu_events(u32 clr)
pmu->events_guest &= ~clr;
}
-#define PMEVTYPER_READ_CASE(idx) \
- case idx: \
- return read_sysreg(pmevtyper##idx##_el0)
-
-#define PMEVTYPER_WRITE_CASE(idx) \
- case idx: \
- write_sysreg(val, pmevtyper##idx##_el0); \
- break
-
-#define PMEVTYPER_CASES(readwrite) \
- PMEVTYPER_##readwrite##_CASE(0); \
- PMEVTYPER_##readwrite##_CASE(1); \
- PMEVTYPER_##readwrite##_CASE(2); \
- PMEVTYPER_##readwrite##_CASE(3); \
- PMEVTYPER_##readwrite##_CASE(4); \
- PMEVTYPER_##readwrite##_CASE(5); \
- PMEVTYPER_##readwrite##_CASE(6); \
- PMEVTYPER_##readwrite##_CASE(7); \
- PMEVTYPER_##readwrite##_CASE(8); \
- PMEVTYPER_##readwrite##_CASE(9); \
- PMEVTYPER_##readwrite##_CASE(10); \
- PMEVTYPER_##readwrite##_CASE(11); \
- PMEVTYPER_##readwrite##_CASE(12); \
- PMEVTYPER_##readwrite##_CASE(13); \
- PMEVTYPER_##readwrite##_CASE(14); \
- PMEVTYPER_##readwrite##_CASE(15); \
- PMEVTYPER_##readwrite##_CASE(16); \
- PMEVTYPER_##readwrite##_CASE(17); \
- PMEVTYPER_##readwrite##_CASE(18); \
- PMEVTYPER_##readwrite##_CASE(19); \
- PMEVTYPER_##readwrite##_CASE(20); \
- PMEVTYPER_##readwrite##_CASE(21); \
- PMEVTYPER_##readwrite##_CASE(22); \
- PMEVTYPER_##readwrite##_CASE(23); \
- PMEVTYPER_##readwrite##_CASE(24); \
- PMEVTYPER_##readwrite##_CASE(25); \
- PMEVTYPER_##readwrite##_CASE(26); \
- PMEVTYPER_##readwrite##_CASE(27); \
- PMEVTYPER_##readwrite##_CASE(28); \
- PMEVTYPER_##readwrite##_CASE(29); \
- PMEVTYPER_##readwrite##_CASE(30)
-
/*
* Read a value direct from PMEVTYPER<idx> where idx is 0-30
- * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
+ * or PMxCFILTR_EL0 where idx is 31-32.
*/
static u64 kvm_vcpu_pmu_read_evtype_direct(int idx)
{
- switch (idx) {
- PMEVTYPER_CASES(READ);
- case ARMV8_PMU_CYCLE_IDX:
- return read_sysreg(pmccfiltr_el0);
- default:
- WARN_ON(1);
- }
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ return read_pmccfiltr();
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ return read_pmicfiltr();
- return 0;
+ return read_pmevtypern(idx);
}
/*
* Write a value direct to PMEVTYPER<idx> where idx is 0-30
- * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
+ * or PMxCFILTR_EL0 where idx is 31-32.
*/
static void kvm_vcpu_pmu_write_evtype_direct(int idx, u32 val)
{
- switch (idx) {
- PMEVTYPER_CASES(WRITE);
- case ARMV8_PMU_CYCLE_IDX:
- write_sysreg(val, pmccfiltr_el0);
- break;
- default:
- WARN_ON(1);
- }
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ write_pmccfiltr(val);
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ write_pmicfiltr(val);
+ else
+ write_pmevtypern(idx, val);
}
/*
@@ -145,7 +100,7 @@ static void kvm_vcpu_pmu_enable_el0(unsigned long events)
u64 typer;
u32 counter;
- for_each_set_bit(counter, &events, 32) {
+ for_each_set_bit(counter, &events, ARMPMU_MAX_HWEVENTS) {
typer = kvm_vcpu_pmu_read_evtype_direct(counter);
typer &= ~ARMV8_PMU_EXCLUDE_EL0;
kvm_vcpu_pmu_write_evtype_direct(counter, typer);
@@ -160,7 +115,7 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
u64 typer;
u32 counter;
- for_each_set_bit(counter, &events, 32) {
+ for_each_set_bit(counter, &events, ARMPMU_MAX_HWEVENTS) {
typer = kvm_vcpu_pmu_read_evtype_direct(counter);
typer |= ARMV8_PMU_EXCLUDE_EL0;
kvm_vcpu_pmu_write_evtype_direct(counter, typer);
@@ -176,7 +131,7 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_events *pmu;
- u32 events_guest, events_host;
+ u64 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
@@ -197,7 +152,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_events *pmu;
- u32 events_guest, events_host;
+ u64 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
new file mode 100644
index 000000000000..e4a342e903e2
--- /dev/null
+++ b/arch/arm64/kvm/ptdump.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Debug helper used to dump the stage-2 pagetables of the system and their
+ * associated permissions.
+ *
+ * Copyright (C) Google, 2024
+ * Author: Sebastian Ene <sebastianene@google.com>
+ */
+#include <linux/debugfs.h>
+#include <linux/kvm_host.h>
+#include <linux/seq_file.h>
+
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_pgtable.h>
+#include <asm/ptdump.h>
+
+#define MARKERS_LEN 2
+#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
+
+struct kvm_ptdump_guest_state {
+ struct kvm *kvm;
+ struct ptdump_pg_state parser_state;
+ struct addr_marker ipa_marker[MARKERS_LEN];
+ struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
+ struct ptdump_range range[MARKERS_LEN];
+};
+
+static const struct ptdump_prot_bits stage2_pte_bits[] = {
+ {
+ .mask = PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "F",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .set = "R",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .set = "W",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "X",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .set = "AF",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT | PTE_VALID,
+ .val = PTE_VALID,
+ .set = "BLK",
+ .clear = " ",
+ },
+};
+
+static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
+{
+ struct ptdump_pg_state *st = ctx->arg;
+ struct ptdump_state *pt_st = &st->ptdump;
+
+ note_page(pt_st, ctx->addr, ctx->level, ctx->old);
+
+ return 0;
+}
+
+static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
+{
+ u32 i;
+ u64 mask;
+
+ if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
+ return -EINVAL;
+
+ mask = 0;
+ for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
+ mask |= stage2_pte_bits[i].mask;
+
+ for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ snprintf(level[i].name, sizeof(level[i].name), "%u", i);
+
+ level[i].num = ARRAY_SIZE(stage2_pte_bits);
+ level[i].bits = stage2_pte_bits;
+ level[i].mask = mask;
+ }
+
+ return 0;
+}
+
+static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm)
+{
+ struct kvm_ptdump_guest_state *st;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgtable = mmu->pgt;
+ int ret;
+
+ st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
+ if (!st)
+ return ERR_PTR(-ENOMEM);
+
+ ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
+ if (ret) {
+ kfree(st);
+ return ERR_PTR(ret);
+ }
+
+ st->ipa_marker[0].name = "Guest IPA";
+ st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
+ st->range[0].end = BIT(pgtable->ia_bits);
+
+ st->kvm = kvm;
+ st->parser_state = (struct ptdump_pg_state) {
+ .marker = &st->ipa_marker[0],
+ .level = -1,
+ .pg_level = &st->level[0],
+ .ptdump.range = &st->range[0],
+ .start_address = 0,
+ };
+
+ return st;
+}
+
+static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
+{
+ int ret;
+ struct kvm_ptdump_guest_state *st = m->private;
+ struct kvm *kvm = st->kvm;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct ptdump_pg_state *parser_state = &st->parser_state;
+ struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
+ .cb = kvm_ptdump_visitor,
+ .arg = parser_state,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ parser_state->seq = m;
+
+ write_lock(&kvm->mmu_lock);
+ ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker);
+ write_unlock(&kvm->mmu_lock);
+
+ return ret;
+}
+
+static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+ struct kvm_ptdump_guest_state *st;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ st = kvm_ptdump_parser_create(kvm);
+ if (IS_ERR(st)) {
+ ret = PTR_ERR(st);
+ goto err_with_kvm_ref;
+ }
+
+ ret = single_open(file, kvm_ptdump_guest_show, st);
+ if (!ret)
+ return 0;
+
+ kfree(st);
+err_with_kvm_ref:
+ kvm_put_kvm(kvm);
+ return ret;
+}
+
+static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+ void *st = ((struct seq_file *)file->private_data)->private;
+
+ kfree(st);
+ kvm_put_kvm(kvm);
+
+ return single_release(m, file);
+}
+
+static const struct file_operations kvm_ptdump_guest_fops = {
+ .open = kvm_ptdump_guest_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_ptdump_guest_close,
+};
+
+static int kvm_pgtable_range_show(struct seq_file *m, void *unused)
+{
+ struct kvm_pgtable *pgtable = m->private;
+
+ seq_printf(m, "%2u\n", pgtable->ia_bits);
+ return 0;
+}
+
+static int kvm_pgtable_levels_show(struct seq_file *m, void *unused)
+{
+ struct kvm_pgtable *pgtable = m->private;
+
+ seq_printf(m, "%1d\n", KVM_PGTABLE_MAX_LEVELS - pgtable->start_level);
+ return 0;
+}
+
+static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file,
+ int (*show)(struct seq_file *, void *))
+{
+ struct kvm *kvm = m->i_private;
+ struct kvm_pgtable *pgtable;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ pgtable = kvm->arch.mmu.pgt;
+
+ ret = single_open(file, show, pgtable);
+ if (ret < 0)
+ kvm_put_kvm(kvm);
+ return ret;
+}
+
+static int kvm_pgtable_range_open(struct inode *m, struct file *file)
+{
+ return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_range_show);
+}
+
+static int kvm_pgtable_levels_open(struct inode *m, struct file *file)
+{
+ return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_levels_show);
+}
+
+static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+
+ kvm_put_kvm(kvm);
+ return single_release(m, file);
+}
+
+static const struct file_operations kvm_pgtable_range_fops = {
+ .open = kvm_pgtable_range_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_pgtable_debugfs_close,
+};
+
+static const struct file_operations kvm_pgtable_levels_fops = {
+ .open = kvm_pgtable_levels_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_pgtable_debugfs_close,
+};
+
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm)
+{
+ debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
+ kvm, &kvm_ptdump_guest_fops);
+ debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm,
+ &kvm_pgtable_range_fops);
+ debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry,
+ kvm, &kvm_pgtable_levels_fops);
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c90324060436..dad88e31f953 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -18,6 +18,7 @@
#include <linux/printk.h>
#include <linux/uaccess.h>
+#include <asm/arm_pmuv3.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/debug-monitors.h>
@@ -33,6 +34,7 @@
#include <trace/events/kvm.h>
#include "sys_regs.h"
+#include "vgic/vgic.h"
#include "trace.h"
@@ -46,6 +48,13 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 val);
+static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
static bool bad_trap(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r,
@@ -53,8 +62,7 @@ static bool bad_trap(struct kvm_vcpu *vcpu,
{
WARN_ONCE(1, "Unexpected %s\n", msg);
print_sys_reg_instr(params);
- kvm_inject_undefined(vcpu);
- return false;
+ return undef_access(vcpu, params, r);
}
static bool read_from_write_only(struct kvm_vcpu *vcpu,
@@ -345,10 +353,8 @@ static bool access_dcgsw(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- if (!kvm_has_mte(vcpu->kvm)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_has_mte(vcpu->kvm))
+ return undef_access(vcpu, p, r);
/* Treat MTE S/W ops as we treat the classic ones: with contempt */
return access_dcsw(vcpu, p, r);
@@ -385,10 +391,8 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
u64 val, mask, shift;
if (reg_to_encoding(r) == SYS_TCR2_EL1 &&
- !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+ return undef_access(vcpu, p, r);
BUG_ON(!p->is_write);
@@ -435,6 +439,9 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
{
bool g1;
+ if (!kvm_has_gicv3(vcpu->kvm))
+ return undef_access(vcpu, p, r);
+
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
@@ -478,6 +485,9 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
+ if (!kvm_has_gicv3(vcpu->kvm))
+ return undef_access(vcpu, p, r);
+
if (p->is_write)
return ignore_write(vcpu, p);
@@ -495,14 +505,6 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
-static bool trap_undef(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- kvm_inject_undefined(vcpu);
- return false;
-}
-
/*
* ARMv8.1 mandates at least a trivial LORegion implementation, where all the
* RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
@@ -515,10 +517,8 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
{
u32 sr = reg_to_encoding(r);
- if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP))
+ return undef_access(vcpu, p, r);
if (p->is_write && sr == SYS_LORID_EL1)
return write_to_read_only(vcpu, p, r);
@@ -887,7 +887,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
+ __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK;
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -979,7 +979,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
else
/* return PMSELR.SEL field */
p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
- & ARMV8_PMU_COUNTER_MASK;
+ & PMSELR_EL0_SEL_MASK;
return true;
}
@@ -1047,8 +1047,8 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
- idx = __vcpu_sys_reg(vcpu, PMSELR_EL0)
- & ARMV8_PMU_COUNTER_MASK;
+ idx = SYS_FIELD_GET(PMSELR_EL0, SEL,
+ __vcpu_sys_reg(vcpu, PMSELR_EL0));
} else if (r->Op2 == 0) {
/* PMCCNTR_EL0 */
if (pmu_access_cycle_counter_el0_disabled(vcpu))
@@ -1098,7 +1098,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
/* PMXEVTYPER_EL0 */
- idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ idx = SYS_FIELD_GET(PMSELR_EL0, SEL, __vcpu_sys_reg(vcpu, PMSELR_EL0));
reg = PMEVTYPER0_EL0 + idx;
} else if (r->CRn == 14 && (r->CRm & 12) == 12) {
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
@@ -1251,10 +1251,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
- if (!vcpu_mode_priv(vcpu)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!vcpu_mode_priv(vcpu))
+ return undef_access(vcpu, p, r);
__vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
p->regval & ARMV8_PMU_USERENR_MASK;
@@ -1338,14 +1336,6 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
.reset = reset_pmevtyper, \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
-static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- kvm_inject_undefined(vcpu);
-
- return false;
-}
-
/* Macro to expand the AMU counter and type registers*/
#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access }
#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access }
@@ -1404,8 +1394,7 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
break;
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
- kvm_inject_undefined(vcpu);
- return false;
+ return undef_access(vcpu, p, r);
}
if (p->is_write)
@@ -1539,6 +1528,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
break;
+ case SYS_ID_AA64PFR2_EL1:
+ /* We only expose FPMR */
+ val &= ID_AA64PFR2_EL1_FPMR;
+ break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
@@ -1556,6 +1549,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
case SYS_ID_AA64MMFR2_EL1:
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
break;
+ case SYS_ID_AA64MMFR3_EL1:
+ val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE;
+ break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
break;
@@ -1669,6 +1665,24 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static unsigned int sme_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SME, IMP))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int fp8_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_fpmr(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
@@ -2085,26 +2099,6 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
/*
- * EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
- * HCR_EL2.E2H==1, and only in the sysreg table for convenience of
- * handling traps. Given that, they are always hidden from userspace.
- */
-static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- return REG_HIDDEN_USER;
-}
-
-#define EL12_REG(name, acc, rst, v) { \
- SYS_DESC(SYS_##name##_EL12), \
- .access = acc, \
- .reset = rst, \
- .reg = name##_EL1, \
- .val = v, \
- .visibility = hidden_user_visibility, \
-}
-
-/*
* Since reset() callback and field val are not used for idregs, they will be
* used for specific purposes for idregs.
* The reset() would return KVM sanitised register value. The value would be the
@@ -2211,6 +2205,18 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_cntkctl_el12(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval;
+ else
+ p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1);
+
+ return true;
+}
+
static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 val = r->val;
@@ -2255,6 +2261,15 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu,
return true;
}
+static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -2301,7 +2316,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
// DBGDTR[TR]X_EL0 share the same encoding
{ SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi },
- { SYS_DESC(SYS_DBGVCR32_EL2), trap_undef, reset_val, DBGVCR32_EL2, 0 },
+ { SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 },
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
@@ -2359,16 +2374,15 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SVE |
ID_AA64PFR0_EL1_RAS |
- ID_AA64PFR0_EL1_GIC |
ID_AA64PFR0_EL1_AdvSIMD |
ID_AA64PFR0_EL1_FP), },
ID_SANITISED(ID_AA64PFR1_EL1),
- ID_UNALLOCATED(4,2),
+ ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
ID_HIDDEN(ID_AA64SMFR0_EL1),
ID_UNALLOCATED(4,6),
- ID_UNALLOCATED(4,7),
+ ID_WRITABLE(ID_AA64FPFR0_EL1, ~ID_AA64FPFR0_EL1_RES0),
/* CRm=5 */
{ SYS_DESC(SYS_ID_AA64DFR0_EL1),
@@ -2418,7 +2432,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_IDS |
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
- ID_SANITISED(ID_AA64MMFR3_EL1),
+ ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_S1POE)),
ID_SANITISED(ID_AA64MMFR4_EL1),
ID_UNALLOCATED(7,5),
ID_UNALLOCATED(7,6),
@@ -2449,6 +2464,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SPSR_EL1), access_spsr},
{ SYS_DESC(SYS_ELR_EL1), access_elr},
+ { SYS_DESC(SYS_ICC_PMR_EL1), undef_access },
+
{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
@@ -2492,6 +2509,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
{ SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
+ { SYS_DESC(SYS_POR_EL1), NULL, reset_unknown, POR_EL1,
+ .visibility = s1poe_visibility },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },
@@ -2503,18 +2522,31 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
- { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
- { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
- { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only },
- { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
- { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_IAR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_EOIR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_HPPIR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_BPR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R2_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R3_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
- { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
- { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
- { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_HPPIR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_BPR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_CTLR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+ { SYS_DESC(SYS_ICC_IGRPEN0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_IGRPEN1_EL1), undef_access },
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
@@ -2535,7 +2567,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
CTR_EL0_IDC_MASK |
CTR_EL0_DminLine_MASK |
CTR_EL0_IminLine_MASK),
- { SYS_DESC(SYS_SVCR), undef_access },
+ { SYS_DESC(SYS_SVCR), undef_access, reset_val, SVCR, 0, .visibility = sme_visibility },
+ { SYS_DESC(SYS_FPMR), undef_access, reset_val, FPMR, 0, .visibility = fp8_visibility },
{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
.reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
@@ -2578,6 +2611,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.access = access_pmovs, .reg = PMOVSSET_EL0,
.get_user = get_pmreg, .set_user = set_pmreg },
+ { SYS_DESC(SYS_POR_EL0), NULL, reset_unknown, POR_EL0,
+ .visibility = s1poe_visibility },
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
{ SYS_DESC(SYS_TPIDR2_EL0), undef_access },
@@ -2758,7 +2793,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
- { SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
+ { SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 },
EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
@@ -2767,20 +2802,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
/* AArch32 SPSR_* are RES0 if trapped from a NV guest */
- { SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
- { SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
- { SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
- { SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
-
- { SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
+ { SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi },
+ { SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi },
+ { SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi },
+ { SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi },
+
+ { SYS_DESC(SYS_IFSR32_EL2), undef_access, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG_REDIR(ESR_EL2, reset_val, 0),
- { SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
+ { SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG_REDIR(FAR_EL2, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
@@ -2790,7 +2821,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
- { SYS_DESC(SYS_RMR_EL2), trap_undef },
+ { SYS_DESC(SYS_RMR_EL2), undef_access },
+
+ EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0),
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
@@ -2798,11 +2831,48 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
- EL12_REG(CNTKCTL, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
};
+static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ __kvm_at_s1e01(vcpu, op, p->regval);
+
+ return true;
+}
+
+static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ /* There is no FGT associated with AT S1E2A :-( */
+ if (op == OP_AT_S1E2A &&
+ !kvm_has_feat(vcpu->kvm, ID_AA64ISAR2_EL1, ATS1A, IMP)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ __kvm_at_s1e2(vcpu, op, p->regval);
+
+ return true;
+}
+
+static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ __kvm_at_s12(vcpu, op, p->regval);
+
+ return true;
+}
+
static bool kvm_supported_tlbi_s12_op(struct kvm_vcpu *vpcu, u32 instr)
{
struct kvm *kvm = vpcu->kvm;
@@ -2824,10 +2894,8 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
- if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
write_lock(&vcpu->kvm->mmu_lock);
@@ -2896,10 +2964,8 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
u64 limit, vttbr;
- if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
limit = BIT_ULL(kvm_get_pa_bits(vcpu->kvm));
@@ -2924,10 +2990,8 @@ static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u64 base, range, tg, num, scale;
int shift;
- if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
/*
* Because the shadow S2 structure doesn't necessarily reflect that
@@ -2995,10 +3059,8 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
- if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
&(union tlbi_info) {
@@ -3038,10 +3100,8 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
WARN_ON(!vcpu_is_el2(vcpu));
- if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
&(union tlbi_info) {
@@ -3065,6 +3125,14 @@ static struct sys_reg_desc sys_insn_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
{ SYS_DESC(SYS_DC_IGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
+
+ SYS_INSN(AT_S1E1R, handle_at_s1e01),
+ SYS_INSN(AT_S1E1W, handle_at_s1e01),
+ SYS_INSN(AT_S1E0R, handle_at_s1e01),
+ SYS_INSN(AT_S1E0W, handle_at_s1e01),
+ SYS_INSN(AT_S1E1RP, handle_at_s1e01),
+ SYS_INSN(AT_S1E1WP, handle_at_s1e01),
+
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
@@ -3144,19 +3212,27 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_VALE1NXS, handle_tlbi_el1),
SYS_INSN(TLBI_VAALE1NXS, handle_tlbi_el1),
+ SYS_INSN(AT_S1E2R, handle_at_s1e2),
+ SYS_INSN(AT_S1E2W, handle_at_s1e2),
+ SYS_INSN(AT_S12E1R, handle_at_s12),
+ SYS_INSN(AT_S12E1W, handle_at_s12),
+ SYS_INSN(AT_S12E0R, handle_at_s12),
+ SYS_INSN(AT_S12E0W, handle_at_s12),
+ SYS_INSN(AT_S1E2A, handle_at_s1e2),
+
SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2E1IS, handle_ripas2e1is),
SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1IS, handle_ripas2e1is),
- SYS_INSN(TLBI_ALLE2OS, trap_undef),
- SYS_INSN(TLBI_VAE2OS, trap_undef),
+ SYS_INSN(TLBI_ALLE2OS, undef_access),
+ SYS_INSN(TLBI_VAE2OS, undef_access),
SYS_INSN(TLBI_ALLE1OS, handle_alle1is),
- SYS_INSN(TLBI_VALE2OS, trap_undef),
+ SYS_INSN(TLBI_VALE2OS, undef_access),
SYS_INSN(TLBI_VMALLS12E1OS, handle_vmalls12e1is),
- SYS_INSN(TLBI_RVAE2IS, trap_undef),
- SYS_INSN(TLBI_RVALE2IS, trap_undef),
+ SYS_INSN(TLBI_RVAE2IS, undef_access),
+ SYS_INSN(TLBI_RVALE2IS, undef_access),
SYS_INSN(TLBI_ALLE1IS, handle_alle1is),
SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is),
@@ -3168,10 +3244,10 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OS, handle_ripas2e1is),
- SYS_INSN(TLBI_RVAE2OS, trap_undef),
- SYS_INSN(TLBI_RVALE2OS, trap_undef),
- SYS_INSN(TLBI_RVAE2, trap_undef),
- SYS_INSN(TLBI_RVALE2, trap_undef),
+ SYS_INSN(TLBI_RVAE2OS, undef_access),
+ SYS_INSN(TLBI_RVALE2OS, undef_access),
+ SYS_INSN(TLBI_RVAE2, undef_access),
+ SYS_INSN(TLBI_RVALE2, undef_access),
SYS_INSN(TLBI_ALLE1, handle_alle1is),
SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is),
@@ -3180,19 +3256,19 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1ISNXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1ISNXS, handle_ripas2e1is),
- SYS_INSN(TLBI_ALLE2OSNXS, trap_undef),
- SYS_INSN(TLBI_VAE2OSNXS, trap_undef),
+ SYS_INSN(TLBI_ALLE2OSNXS, undef_access),
+ SYS_INSN(TLBI_VAE2OSNXS, undef_access),
SYS_INSN(TLBI_ALLE1OSNXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2OSNXS, trap_undef),
+ SYS_INSN(TLBI_VALE2OSNXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1OSNXS, handle_vmalls12e1is),
- SYS_INSN(TLBI_RVAE2ISNXS, trap_undef),
- SYS_INSN(TLBI_RVALE2ISNXS, trap_undef),
- SYS_INSN(TLBI_ALLE2ISNXS, trap_undef),
- SYS_INSN(TLBI_VAE2ISNXS, trap_undef),
+ SYS_INSN(TLBI_RVAE2ISNXS, undef_access),
+ SYS_INSN(TLBI_RVALE2ISNXS, undef_access),
+ SYS_INSN(TLBI_ALLE2ISNXS, undef_access),
+ SYS_INSN(TLBI_VAE2ISNXS, undef_access),
SYS_INSN(TLBI_ALLE1ISNXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2ISNXS, trap_undef),
+ SYS_INSN(TLBI_VALE2ISNXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1ISNXS, handle_vmalls12e1is),
SYS_INSN(TLBI_IPAS2E1OSNXS, handle_ipas2e1is),
SYS_INSN(TLBI_IPAS2E1NXS, handle_ipas2e1is),
@@ -3202,14 +3278,14 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1NXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1NXS, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OSNXS, handle_ripas2e1is),
- SYS_INSN(TLBI_RVAE2OSNXS, trap_undef),
- SYS_INSN(TLBI_RVALE2OSNXS, trap_undef),
- SYS_INSN(TLBI_RVAE2NXS, trap_undef),
- SYS_INSN(TLBI_RVALE2NXS, trap_undef),
- SYS_INSN(TLBI_ALLE2NXS, trap_undef),
- SYS_INSN(TLBI_VAE2NXS, trap_undef),
+ SYS_INSN(TLBI_RVAE2OSNXS, undef_access),
+ SYS_INSN(TLBI_RVALE2OSNXS, undef_access),
+ SYS_INSN(TLBI_RVAE2NXS, undef_access),
+ SYS_INSN(TLBI_RVALE2NXS, undef_access),
+ SYS_INSN(TLBI_ALLE2NXS, undef_access),
+ SYS_INSN(TLBI_VAE2NXS, undef_access),
SYS_INSN(TLBI_ALLE1NXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2NXS, trap_undef),
+ SYS_INSN(TLBI_VALE2NXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1NXS, handle_vmalls12e1is),
};
@@ -3387,6 +3463,7 @@ static const struct sys_reg_desc cp15_regs[] = {
/* TTBCR2 */
{ AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 },
{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 },
+ { CP15_SYS_DESC(SYS_ICC_PMR_EL1), undef_access },
/* DFSR */
{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 },
{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 },
@@ -3436,8 +3513,28 @@ static const struct sys_reg_desc cp15_regs[] = {
/* AMAIR1 */
{ AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 },
- /* ICC_SRE */
- { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
+ { CP15_SYS_DESC(SYS_ICC_IAR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_EOIR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_HPPIR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_BPR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R2_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R3_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_HPPIR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_BPR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_CTLR_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+ { CP15_SYS_DESC(SYS_ICC_IGRPEN0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_IGRPEN1_EL1), undef_access },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 },
@@ -4274,7 +4371,7 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int ret;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
- if (!r || sysreg_hidden_user(vcpu, r))
+ if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
if (r->get_user) {
@@ -4318,7 +4415,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
return -EFAULT;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
- if (!r || sysreg_hidden_user(vcpu, r))
+ if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
if (sysreg_user_write_ignore(vcpu, r))
@@ -4404,7 +4501,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
if (!(rd->reg || rd->get_user))
return 0;
- if (sysreg_hidden_user(vcpu, rd))
+ if (sysreg_hidden(vcpu, rd))
return 0;
if (!copy_reg_to_user(rd, uind))
@@ -4545,6 +4642,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
mutex_lock(&kvm->arch.config_lock);
vcpu_set_hcr(vcpu);
+ vcpu_set_ich_hcr(vcpu);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
/*
@@ -4560,6 +4658,9 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
+
+ if (kvm_has_fpmr(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
}
if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags))
@@ -4568,8 +4669,6 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 |
HFGxTR_EL2_nMAIR2_EL1 |
HFGxTR_EL2_nS2POR_EL1 |
- HFGxTR_EL2_nPOR_EL1 |
- HFGxTR_EL2_nPOR_EL0 |
HFGxTR_EL2_nACCDATA_EL1 |
HFGxTR_EL2_nSMPRI_EL1_MASK |
HFGxTR_EL2_nTPIDR2_EL0_MASK);
@@ -4600,10 +4699,21 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
HFGITR_EL2_TLBIRVAAE1OS |
HFGITR_EL2_TLBIRVAE1OS);
+ if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP))
+ kvm->arch.fgu[HFGITR_GROUP] |= HFGITR_EL2_ATS1E1A;
+
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2))
+ kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP |
+ HFGITR_EL2_ATS1E1WP);
+
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
HFGxTR_EL2_nPIR_EL1);
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 |
+ HFGxTR_EL2_nPOR_EL0);
+
if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
HAFGRTR_EL2_RES1);
@@ -4613,6 +4723,36 @@ out:
mutex_unlock(&kvm->arch.config_lock);
}
+/*
+ * Perform last adjustments to the ID registers that are implied by the
+ * configuration outside of the ID regs themselves, as well as any
+ * initialisation that directly depend on these ID registers (such as
+ * RES0/RES1 behaviours). This is not the place to configure traps though.
+ *
+ * Because this can be called once per CPU, changes must be idempotent.
+ */
+int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ guard(mutex)(&kvm->arch.config_lock);
+
+ if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
+ irqchip_in_kernel(kvm) &&
+ kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) {
+ kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK;
+ kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK;
+ }
+
+ if (vcpu_has_nv(vcpu)) {
+ int ret = kvm_init_nv_sysregs(kvm);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
int __init kvm_sys_reg_table_init(void)
{
bool valid = true;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 997eea21ba2a..1d94ed6efad2 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -95,9 +95,8 @@ struct sys_reg_desc {
};
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
-#define REG_HIDDEN_USER (1 << 1) /* hidden from userspace only */
-#define REG_RAZ (1 << 2) /* RAZ from userspace and guest */
-#define REG_USER_WI (1 << 3) /* WI from userspace only */
+#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
+#define REG_USER_WI (1 << 2) /* WI from userspace only */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -165,15 +164,6 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
return sysreg_visibility(vcpu, r) & REG_HIDDEN;
}
-static inline bool sysreg_hidden_user(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *r)
-{
- if (likely(!r->visibility))
- return false;
-
- return r->visibility(vcpu, r) & (REG_HIDDEN | REG_HIDDEN_USER);
-}
-
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
@@ -235,6 +225,8 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
+int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu);
+
#define AA32(_x) .aarch32_map = AA32_##_x
#define Op0(_x) .Op0 = _x
#define Op1(_x) .Op1 = _x
@@ -248,4 +240,11 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
Op2(sys_reg_Op2(reg))
+#define CP15_SYS_DESC(reg) \
+ .name = #reg, \
+ .aarch32_map = AA32_DIRECT, \
+ Op0(0), Op1(sys_reg_Op1(reg)), \
+ CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
+ Op2(sys_reg_Op2(reg))
+
#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index bc74d06398ef..e1397ab2072a 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -85,7 +85,7 @@ static void iter_unmark_lpis(struct kvm *kvm)
struct vgic_irq *irq;
unsigned long intid;
- xa_for_each(&dist->lpi_xa, intid, irq) {
+ xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) {
xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
vgic_put_irq(kvm, irq);
}
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 41feb858ff9a..e7c53e8af3d1 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -417,10 +417,8 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL;
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- vgic_unregister_redist_iodev(vcpu);
+ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
- }
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -448,6 +446,11 @@ void kvm_vgic_destroy(struct kvm *kvm)
kvm_vgic_dist_destroy(kvm);
mutex_unlock(&kvm->arch.config_lock);
+
+ if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ vgic_unregister_redist_iodev(vcpu);
+
mutex_unlock(&kvm->slots_lock);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 3eecdd2f4b8f..b217b256853c 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -292,6 +292,18 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EN;
+}
+
+void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ /* Hide GICv3 sysreg if necessary */
+ if (!kvm_has_gicv3(vcpu->kvm)) {
+ vgic_v3->vgic_hcr |= ICH_HCR_TALL0 | ICH_HCR_TALL1 | ICH_HCR_TC;
+ return;
+ }
+
if (group0_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
if (group1_trap)
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 974849ea7101..f50274fd5581 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -36,6 +36,11 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* we have to disable IRQs before taking this lock and everything lower
* than it.
*
+ * The config_lock has additional ordering requirements:
+ * kvm->slots_lock
+ * kvm->srcu
+ * kvm->arch.config_lock
+ *
* If you need to take multiple locks, always take the upper lock first,
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
* If you are already holding a lock and need to take a higher one, you
@@ -917,10 +922,13 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
void kvm_vgic_load(struct kvm_vcpu *vcpu)
{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
+ if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
+ }
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_load(vcpu);
else
vgic_v3_load(vcpu);
@@ -928,10 +936,13 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu)
void kvm_vgic_put(struct kvm_vcpu *vcpu)
{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
+ if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
+ }
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_put(vcpu);
else
vgic_v3_put(vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index ba8f790431bd..f2486b4d9f95 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -346,4 +346,11 @@ void vgic_v4_configure_vsgis(struct kvm *kvm);
void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val);
int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq);
+void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu);
+
+static inline bool kvm_has_gicv3(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
+}
+
#endif
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 60454256945b..2fc8c6dd0407 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
- ioremap.o mmap.o pgd.o mmu.o \
+ ioremap.o mmap.o pgd.o mem_encrypt.o mmu.o \
context.o proc.o pageattr.o fixmap.o
obj-$(CONFIG_ARM64_CONTPTE) += contpte.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index a3edced29ac1..55107d27d3f8 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -421,6 +421,12 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
ptep = contpte_align_down(ptep);
start_addr = addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
+ /*
+ * We are not advancing entry because __ptep_set_access_flags()
+ * only consumes access flags from entry. And since we have checked
+ * for the whole contpte block and returned early, pte_same()
+ * within __ptep_set_access_flags() is likely false.
+ */
for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE)
__ptep_set_access_flags(vma, addr, ptep, entry, 0);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 451ba7cbd5ad..8b281cf308b3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -23,6 +23,7 @@
#include <linux/sched/debug.h>
#include <linux/highmem.h>
#include <linux/perf_event.h>
+#include <linux/pkeys.h>
#include <linux/preempt.h>
#include <linux/hugetlb.h>
@@ -486,6 +487,23 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
+static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
+ unsigned int mm_flags)
+{
+ unsigned long iss2 = ESR_ELx_ISS2(esr);
+
+ if (!system_supports_poe())
+ return false;
+
+ if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay))
+ return true;
+
+ return !arch_vma_access_permitted(vma,
+ mm_flags & FAULT_FLAG_WRITE,
+ mm_flags & FAULT_FLAG_INSTRUCTION,
+ false);
+}
+
static bool is_el0_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -511,6 +529,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
unsigned long addr = untagged_addr(far);
struct vm_area_struct *vma;
int si_code;
+ int pkey = -1;
if (kprobe_page_fault(regs, esr))
return 0;
@@ -575,6 +594,16 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
goto bad_area;
}
+
+ if (fault_from_pkey(esr, vma, mm_flags)) {
+ pkey = vma_pkey(vma);
+ vma_end_read(vma);
+ fault = 0;
+ si_code = SEGV_PKUERR;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto bad_area;
+ }
+
fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
vma_end_read(vma);
@@ -610,7 +639,16 @@ retry:
goto bad_area;
}
+ if (fault_from_pkey(esr, vma, mm_flags)) {
+ pkey = vma_pkey(vma);
+ mmap_read_unlock(mm);
+ fault = 0;
+ si_code = SEGV_PKUERR;
+ goto bad_area;
+ }
+
fault = handle_mm_fault(vma, addr, mm_flags, regs);
+
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
@@ -669,8 +707,23 @@ bad_area:
arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
} else {
+ /*
+ * The pkey value that we return to userspace can be different
+ * from the pkey that caused the fault.
+ *
+ * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4);
+ * 2. T1 : set POR_EL0 to deny access to pkey=4, touches, page
+ * 3. T1 : faults...
+ * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+ * 5. T1 : enters fault handler, takes mmap_lock, etc...
+ * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
+ * faulted on a pte with its pkey=4.
+ */
/* Something tried to access memory that out of memory map */
- arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
+ if (si_code == SEGV_PKUERR)
+ arm64_force_sig_fault_pkey(far, inf->name, pkey);
+ else
+ arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
}
return 0;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9b5ab6818f7f..a0400b9aa814 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -414,8 +414,16 @@ void __init mem_init(void)
void free_initmem(void)
{
- free_reserved_area(lm_alias(__init_begin),
- lm_alias(__init_end),
+ void *lm_init_begin = lm_alias(__init_begin);
+ void *lm_init_end = lm_alias(__init_end);
+
+ WARN_ON(!IS_ALIGNED((unsigned long)lm_init_begin, PAGE_SIZE));
+ WARN_ON(!IS_ALIGNED((unsigned long)lm_init_end, PAGE_SIZE));
+
+ /* Delete __init region from memblock.reserved. */
+ memblock_free(lm_init_begin, lm_init_end - lm_init_begin);
+
+ free_reserved_area(lm_init_begin, lm_init_end,
POISON_FREE_INITMEM, "unused kernel");
/*
* Unmap the __init region but leave the VM area in place. This
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 269f2f63ab7d..6cc0b7e7eb03 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -3,10 +3,22 @@
#include <linux/mm.h>
#include <linux/io.h>
+static ioremap_prot_hook_t ioremap_prot_hook;
+
+int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook)
+{
+ if (WARN_ON(ioremap_prot_hook))
+ return -EBUSY;
+
+ ioremap_prot_hook = hook;
+ return 0;
+}
+
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
unsigned long prot)
{
unsigned long last_addr = phys_addr + size - 1;
+ pgprot_t pgprot = __pgprot(prot);
/* Don't allow outside PHYS_MASK */
if (last_addr & ~PHYS_MASK)
@@ -16,7 +28,16 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr))))
return NULL;
- return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
+ /*
+ * If a hook is registered (e.g. for confidential computing
+ * purposes), call that now and barf if it fails.
+ */
+ if (unlikely(ioremap_prot_hook) &&
+ WARN_ON(ioremap_prot_hook(phys_addr, size, &pgprot))) {
+ return NULL;
+ }
+
+ return generic_ioremap_prot(phys_addr, size, pgprot);
}
EXPORT_SYMBOL(ioremap_prot);
diff --git a/arch/arm64/mm/mem_encrypt.c b/arch/arm64/mm/mem_encrypt.c
new file mode 100644
index 000000000000..ee3c0ab04384
--- /dev/null
+++ b/arch/arm64/mm/mem_encrypt.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Implementation of the memory encryption/decryption API.
+ *
+ * Since the low-level details of the operation depend on the
+ * Confidential Computing environment (e.g. pKVM, CCA, ...), this just
+ * acts as a top-level dispatcher to whatever hooks may have been
+ * registered.
+ *
+ * Author: Will Deacon <will@kernel.org>
+ * Copyright (C) 2024 Google LLC
+ *
+ * "Hello, boils and ghouls!"
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+
+#include <asm/mem_encrypt.h>
+
+static const struct arm64_mem_crypt_ops *crypt_ops;
+
+int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops)
+{
+ if (WARN_ON(crypt_ops))
+ return -EBUSY;
+
+ crypt_ops = ops;
+ return 0;
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ if (likely(!crypt_ops) || WARN_ON(!PAGE_ALIGNED(addr)))
+ return 0;
+
+ return crypt_ops->encrypt(addr, numpages);
+}
+EXPORT_SYMBOL_GPL(set_memory_encrypted);
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ if (likely(!crypt_ops) || WARN_ON(!PAGE_ALIGNED(addr)))
+ return 0;
+
+ return crypt_ops->decrypt(addr, numpages);
+}
+EXPORT_SYMBOL_GPL(set_memory_decrypted);
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 642bdf908b22..7e3ad97e27d8 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -102,6 +102,17 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
if (vm_flags & VM_MTE)
prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
+#ifdef CONFIG_ARCH_HAS_PKEYS
+ if (system_supports_poe()) {
+ if (vm_flags & VM_PKEY_BIT0)
+ prot |= PTE_PO_IDX_0;
+ if (vm_flags & VM_PKEY_BIT1)
+ prot |= PTE_PO_IDX_1;
+ if (vm_flags & VM_PKEY_BIT2)
+ prot |= PTE_PO_IDX_2;
+ }
+#endif
+
return __pgprot(prot);
}
EXPORT_SYMBOL(vm_get_page_prot);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 353ea5dc32b8..e55b02fbddc8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/kfence.h>
+#include <linux/pkeys.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -1549,3 +1550,47 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
cpu_uninstall_idmap();
}
+
+#ifdef CONFIG_ARCH_HAS_PKEYS
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val)
+{
+ u64 new_por = POE_RXW;
+ u64 old_por;
+ u64 pkey_shift;
+
+ if (!system_supports_poe())
+ return -ENOSPC;
+
+ /*
+ * This code should only be called with valid 'pkey'
+ * values originating from in-kernel users. Complain
+ * if a bad value is observed.
+ */
+ if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
+ return -EINVAL;
+
+ /* Set the bits we need in POR: */
+ new_por = POE_RXW;
+ if (init_val & PKEY_DISABLE_WRITE)
+ new_por &= ~POE_W;
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_por &= ~POE_RW;
+ if (init_val & PKEY_DISABLE_READ)
+ new_por &= ~POE_R;
+ if (init_val & PKEY_DISABLE_EXECUTE)
+ new_por &= ~POE_X;
+
+ /* Shift the bits in to the correct place in POR for pkey: */
+ pkey_shift = pkey * POR_BITS_PER_PKEY;
+ new_por <<= pkey_shift;
+
+ /* Get old POR and mask off any old bits in place: */
+ old_por = read_sysreg_s(SYS_POR_EL0);
+ old_por &= ~(POE_MASK << pkey_shift);
+
+ /* Write old part along with new part: */
+ write_sysreg_s(old_por | new_por, SYS_POR_EL0);
+
+ return 0;
+}
+#endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index f4bc6c5bac06..8abdc7fed321 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -36,8 +36,6 @@
#define TCR_KASLR_FLAGS 0
#endif
-#define TCR_SMP_FLAGS TCR_SHARED
-
/* PTWs cacheable, inner/outer WBWA */
#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA
@@ -469,7 +467,7 @@ SYM_FUNC_START(__cpu_setup)
tcr .req x16
mov_q mair, MAIR_EL1_SET
mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \
- TCR_SMP_FLAGS | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
+ TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
tcr_clear_errata_bits tcr, x9, x5
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 6986827e0d64..264c5f9b97d8 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -38,33 +38,7 @@
seq_printf(m, fmt); \
})
-/*
- * The page dumper groups page table entries of the same type into a single
- * description. It uses pg_state to track the range information while
- * iterating over the pte entries. When the continuity is broken it then
- * dumps out a description of the range.
- */
-struct pg_state {
- struct ptdump_state ptdump;
- struct seq_file *seq;
- const struct addr_marker *marker;
- const struct mm_struct *mm;
- unsigned long start_address;
- int level;
- u64 current_prot;
- bool check_wx;
- unsigned long wx_pages;
- unsigned long uxn_pages;
-};
-
-struct prot_bits {
- u64 mask;
- u64 val;
- const char *set;
- const char *clear;
-};
-
-static const struct prot_bits pte_bits[] = {
+static const struct ptdump_prot_bits pte_bits[] = {
{
.mask = PTE_VALID,
.val = PTE_VALID,
@@ -143,14 +117,7 @@ static const struct prot_bits pte_bits[] = {
}
};
-struct pg_level {
- const struct prot_bits *bits;
- char name[4];
- int num;
- u64 mask;
-};
-
-static struct pg_level pg_level[] __ro_after_init = {
+static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
{ /* pgd */
.name = "PGD",
.bits = pte_bits,
@@ -174,7 +141,7 @@ static struct pg_level pg_level[] __ro_after_init = {
},
};
-static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
+static void dump_prot(struct ptdump_pg_state *st, const struct ptdump_prot_bits *bits,
size_t num)
{
unsigned i;
@@ -192,7 +159,7 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
}
}
-static void note_prot_uxn(struct pg_state *st, unsigned long addr)
+static void note_prot_uxn(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
@@ -206,7 +173,7 @@ static void note_prot_uxn(struct pg_state *st, unsigned long addr)
st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_prot_wx(struct pg_state *st, unsigned long addr)
+static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
@@ -221,16 +188,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- u64 val)
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val)
{
- struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
+ struct ptdump_pg_level *pg_level = st->pg_level;
static const char units[] = "KMGTPE";
u64 prot = 0;
/* check if the current level has been folded dynamically */
- if ((level == 1 && mm_p4d_folded(st->mm)) ||
- (level == 2 && mm_pud_folded(st->mm)))
+ if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) ||
+ (level == 2 && mm_pud_folded(st->mm))))
level = 0;
if (level >= 0)
@@ -286,15 +254,16 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
- struct pg_state st;
+ struct ptdump_pg_state st;
if (info->base_addr < TASK_SIZE_64)
end = TASK_SIZE_64;
- st = (struct pg_state){
+ st = (struct ptdump_pg_state){
.seq = s,
.marker = info->markers,
.mm = info->mm,
+ .pg_level = &kernel_pg_levels[0],
.level = -1,
.ptdump = {
.note_page = note_page,
@@ -312,10 +281,10 @@ static void __init ptdump_initialize(void)
{
unsigned i, j;
- for (i = 0; i < ARRAY_SIZE(pg_level); i++)
- if (pg_level[i].bits)
- for (j = 0; j < pg_level[i].num; j++)
- pg_level[i].mask |= pg_level[i].bits[j].mask;
+ for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++)
+ if (kernel_pg_levels[i].bits)
+ for (j = 0; j < kernel_pg_levels[i].num; j++)
+ kernel_pg_levels[i].mask |= kernel_pg_levels[i].bits[j].mask;
}
static struct ptdump_info kernel_ptdump_info __ro_after_init = {
@@ -324,12 +293,13 @@ static struct ptdump_info kernel_ptdump_info __ro_after_init = {
bool ptdump_check_wx(void)
{
- struct pg_state st = {
+ struct ptdump_pg_state st = {
.seq = NULL,
.marker = (struct addr_marker[]) {
{ 0, NULL},
{ -1, NULL},
},
+ .pg_level = &kernel_pg_levels[0],
.level = -1,
.check_wx = true,
.ptdump = {
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index 5139a28130c0..0f7b484cb2ff 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -42,14 +42,16 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
* the temporary mappings we use during restore.
*/
__set_pte(dst_ptep, pte_mkwrite_novma(pte));
- } else if ((debug_pagealloc_enabled() ||
- is_kfence_address((void *)addr)) && !pte_none(pte)) {
+ } else if (!pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
* the page isn't in use by the resume kernel. It may have
* been in use by the original kernel, in which case we need
* to put it back in our copy to do the restore.
*
+ * Other cases include kfence / vmalloc / memfd_secret which
+ * may call `set_direct_map_invalid_noflush()`.
+ *
* Before marking this entry valid, check the pfn should
* be mapped.
*/
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index ac3429d892b9..eedb5acc21ed 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -45,6 +45,7 @@ HAS_MOPS
HAS_NESTED_VIRT
HAS_PAN
HAS_S1PIE
+HAS_S1POE
HAS_RAS_EXTN
HAS_RNG
HAS_SB
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 7ceaa1e0b4bc..8d637ac4b7c6 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -2029,6 +2029,31 @@ Sysreg FAR_EL1 3 0 6 0 0
Field 63:0 ADDR
EndSysreg
+Sysreg PMICNTR_EL0 3 3 9 4 0
+Field 63:0 ICNT
+EndSysreg
+
+Sysreg PMICFILTR_EL0 3 3 9 6 0
+Res0 63:59
+Field 58 SYNC
+Field 57:56 VS
+Res0 55:32
+Field 31 P
+Field 30 U
+Field 29 NSK
+Field 28 NSU
+Field 27 NSH
+Field 26 M
+Res0 25
+Field 24 SH
+Field 23 T
+Field 22 RLK
+Field 21 RLU
+Field 20 RLH
+Res0 19:16
+Field 15:0 evtCount
+EndSysreg
+
Sysreg PMSCR_EL1 3 0 9 9 0
Res0 63:8
Field 7:6 PCT
@@ -2153,6 +2178,11 @@ Field 4 P
Field 3:0 ALIGN
EndSysreg
+Sysreg PMSELR_EL0 3 3 9 12 5
+Res0 63:5
+Field 4:0 SEL
+EndSysreg
+
SysregFields CONTEXTIDR_ELx
Res0 63:32
Field 31:0 PROCID
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 2bb3676429c0..4635b755b2b4 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -6,7 +6,6 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
generic-y += qrwlock.h
-generic-y += qspinlock.h
generic-y += user.h
generic-y += ioctl.h
generic-y += statfs.h
diff --git a/arch/loongarch/include/asm/dma-direct.h b/arch/loongarch/include/asm/dma-direct.h
deleted file mode 100644
index 75ccd808a2af..000000000000
--- a/arch/loongarch/include/asm/dma-direct.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _LOONGARCH_DMA_DIRECT_H
-#define _LOONGARCH_DMA_DIRECT_H
-
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
-
-#endif /* _LOONGARCH_DMA_DIRECT_H */
diff --git a/arch/loongarch/include/asm/hw_irq.h b/arch/loongarch/include/asm/hw_irq.h
index af4f4e8fbd85..8156ffb67415 100644
--- a/arch/loongarch/include/asm/hw_irq.h
+++ b/arch/loongarch/include/asm/hw_irq.h
@@ -9,6 +9,8 @@
extern atomic_t irq_err_count;
+#define ARCH_IRQ_INIT_FLAGS IRQ_NOPROBE
+
/*
* interrupt-retrigger: NOP for now. This may not be appropriate for all
* machines, we'll see ...
diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h
index 724ca8b7b401..4a76ce796f1f 100644
--- a/arch/loongarch/include/asm/kvm_csr.h
+++ b/arch/loongarch/include/asm/kvm_csr.h
@@ -30,6 +30,7 @@
: [val] "+r" (__v) \
: [reg] "i" (csr) \
: "memory"); \
+ __v; \
})
#define gcsr_xchg(v, m, csr) \
@@ -181,6 +182,8 @@ __BUILD_GCSR_OP(tlbidx)
#define kvm_save_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_read(gid))
#define kvm_restore_hw_gcsr(csr, gid) (gcsr_write(csr->csrs[gid], gid))
+#define kvm_read_clear_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_write(0, gid))
+
int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu);
static __always_inline unsigned long kvm_read_sw_gcsr(struct loongarch_csrs *csr, int gid)
@@ -208,4 +211,7 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr,
csr->csrs[gid] |= val & _mask;
}
+#define KVM_PMU_EVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | \
+ CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3)
+
#endif /* __ASM_LOONGARCH_KVM_CSR_H__ */
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 5f0677e03817..d6bb72424027 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
#define KVM_HALT_POLL_NS_DEFAULT 500000
#define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(1)
+#define KVM_REQ_PMU KVM_ARCH_REQ(2)
#define KVM_GUESTDBG_SW_BP_MASK \
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
@@ -60,9 +61,13 @@ struct kvm_arch_memory_slot {
unsigned long flags;
};
+#define HOST_MAX_PMNUM 16
struct kvm_context {
unsigned long vpid_cache;
struct kvm_vcpu *last_vcpu;
+ /* Host PMU CSR */
+ u64 perf_ctrl[HOST_MAX_PMNUM];
+ u64 perf_cntr[HOST_MAX_PMNUM];
};
struct kvm_world_switch {
@@ -107,6 +112,8 @@ struct kvm_arch {
unsigned int root_level;
spinlock_t phyid_map_lock;
struct kvm_phyid_map *phyid_map;
+ /* Enabled PV features */
+ unsigned long pv_features;
s64 time_offset;
struct kvm_context __percpu *vmcs;
@@ -133,8 +140,15 @@ enum emulation_result {
#define KVM_LARCH_FPU (0x1 << 0)
#define KVM_LARCH_LSX (0x1 << 1)
#define KVM_LARCH_LASX (0x1 << 2)
-#define KVM_LARCH_SWCSR_LATEST (0x1 << 3)
-#define KVM_LARCH_HWCSR_USABLE (0x1 << 4)
+#define KVM_LARCH_LBT (0x1 << 3)
+#define KVM_LARCH_PMU (0x1 << 4)
+#define KVM_LARCH_SWCSR_LATEST (0x1 << 5)
+#define KVM_LARCH_HWCSR_USABLE (0x1 << 6)
+
+#define LOONGARCH_PV_FEAT_UPDATED BIT_ULL(63)
+#define LOONGARCH_PV_FEAT_MASK (BIT(KVM_FEATURE_IPI) | \
+ BIT(KVM_FEATURE_STEAL_TIME) | \
+ BIT(KVM_FEATURE_VIRT_EXTIOI))
struct kvm_vcpu_arch {
/*
@@ -168,10 +182,14 @@ struct kvm_vcpu_arch {
/* FPU state */
struct loongarch_fpu fpu FPU_ALIGN;
+ struct loongarch_lbt lbt;
/* CSR state */
struct loongarch_csrs *csr;
+ /* Guest max PMU CSR id */
+ int max_pmu_csrid;
+
/* GPR used as IO source/target */
u32 io_gpr;
@@ -239,6 +257,21 @@ static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch)
return arch->cpucfg[2] & CPUCFG2_LASX;
}
+static inline bool kvm_guest_has_lbt(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[2] & (CPUCFG2_X86BT | CPUCFG2_ARMBT | CPUCFG2_MIPSBT);
+}
+
+static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[6] & CPUCFG6_PMP;
+}
+
+static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch)
+{
+ return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT;
+}
+
/* Debug: dump vcpu state */
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 43ec61589e6c..c4e84227280d 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -2,6 +2,8 @@
#ifndef _ASM_LOONGARCH_KVM_PARA_H
#define _ASM_LOONGARCH_KVM_PARA_H
+#include <uapi/asm/kvm_para.h>
+
/*
* Hypercall code field
*/
@@ -154,10 +156,20 @@ static __always_inline long kvm_hypercall5(u64 fid,
return ret;
}
+#ifdef CONFIG_PARAVIRT
+bool kvm_para_available(void);
+unsigned int kvm_arch_para_features(void);
+#else
+static inline bool kvm_para_available(void)
+{
+ return false;
+}
+
static inline unsigned int kvm_arch_para_features(void)
{
return 0;
}
+#endif
static inline unsigned int kvm_arch_para_hints(void)
{
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index c416cb7125c0..d7e8f7d50ee0 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -75,8 +75,13 @@ static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { }
static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { }
#endif
+#ifdef CONFIG_CPU_HAS_LBT
+int kvm_own_lbt(struct kvm_vcpu *vcpu);
+#else
+static inline int kvm_own_lbt(struct kvm_vcpu *vcpu) { return -EINVAL; }
+#endif
+
void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz);
-void kvm_reset_timer(struct kvm_vcpu *vcpu);
void kvm_save_timer(struct kvm_vcpu *vcpu);
void kvm_restore_timer(struct kvm_vcpu *vcpu);
@@ -125,4 +130,9 @@ static inline bool kvm_pvtime_supported(void)
return !!sched_info_on();
}
+static inline bool kvm_guest_has_pv_feature(struct kvm_vcpu *vcpu, unsigned int feature)
+{
+ return vcpu->kvm->arch.pv_features & BIT(feature);
+}
+
#endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 04a78010fc72..24a3f4925cfb 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -119,6 +119,7 @@
#define CPUCFG6_PMP BIT(0)
#define CPUCFG6_PAMVER GENMASK(3, 1)
#define CPUCFG6_PMNUM GENMASK(7, 4)
+#define CPUCFG6_PMNUM_SHIFT 4
#define CPUCFG6_PMBITS GENMASK(13, 8)
#define CPUCFG6_UPM BIT(14)
@@ -160,16 +161,8 @@
/*
* CPUCFG index area: 0x40000000 -- 0x400000ff
- * SW emulation for KVM hypervirsor
+ * SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h
*/
-#define CPUCFG_KVM_BASE 0x40000000
-#define CPUCFG_KVM_SIZE 0x100
-
-#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0)
-#define KVM_SIGNATURE "KVM\0"
-#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
-#define KVM_FEATURE_IPI BIT(1)
-#define KVM_FEATURE_STEAL_TIME BIT(2)
#ifndef __ASSEMBLY__
diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
index dddec49671ae..3f4323603e6a 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -19,6 +19,7 @@ static inline u64 paravirt_steal_clock(int cpu)
int __init pv_ipi_init(void);
int __init pv_time_init(void);
+int __init pv_spinlock_init(void);
#else
@@ -31,5 +32,11 @@ static inline int pv_time_init(void)
{
return 0;
}
+
+static inline int pv_spinlock_init(void)
+{
+ return 0;
+}
+
#endif // CONFIG_PARAVIRT
#endif
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
new file mode 100644
index 000000000000..e76d3aa1e1eb
--- /dev/null
+++ b/arch/loongarch/include/asm/qspinlock.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_QSPINLOCK_H
+#define _ASM_LOONGARCH_QSPINLOCK_H
+
+#include <linux/jump_label.h>
+
+#ifdef CONFIG_PARAVIRT
+
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+
+#define virt_spin_lock virt_spin_lock
+
+static inline bool virt_spin_lock(struct qspinlock *lock)
+{
+ int val;
+
+ if (!static_branch_unlikely(&virt_spin_lock_key))
+ return false;
+
+ /*
+ * On hypervisors without PARAVIRT_SPINLOCKS support we fall
+ * back to a Test-and-Set spinlock, because fair locks have
+ * horrible lock 'holder' preemption issues.
+ */
+
+__retry:
+ val = atomic_read(&lock->val);
+
+ if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
+ cpu_relax();
+ goto __retry;
+ }
+
+ return true;
+}
+
+#endif /* CONFIG_PARAVIRT */
+
+#include <asm-generic/qspinlock.h>
+
+#endif // _ASM_LOONGARCH_QSPINLOCK_H
diff --git a/arch/loongarch/include/uapi/asm/Kbuild b/arch/loongarch/include/uapi/asm/Kbuild
index c6d141d7b7d7..517761419999 100644
--- a/arch/loongarch/include/uapi/asm/Kbuild
+++ b/arch/loongarch/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
syscall-y += unistd_64.h
-
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
index ddc5cab0ffd0..70d89070bfeb 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -64,6 +64,7 @@ struct kvm_fpu {
#define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL)
#define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL)
#define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL)
+#define KVM_REG_LOONGARCH_LBT (KVM_REG_LOONGARCH | 0x50000ULL)
#define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL)
#define KVM_CSR_IDX_MASK 0x7fff
#define KVM_CPUCFG_IDX_MASK 0x7fff
@@ -77,11 +78,30 @@ struct kvm_fpu {
/* Debugging: Special instruction for software breakpoint */
#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3)
+/* LBT registers */
+#define KVM_REG_LOONGARCH_LBT_SCR0 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 1)
+#define KVM_REG_LOONGARCH_LBT_SCR1 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 2)
+#define KVM_REG_LOONGARCH_LBT_SCR2 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 3)
+#define KVM_REG_LOONGARCH_LBT_SCR3 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 4)
+#define KVM_REG_LOONGARCH_LBT_EFLAGS (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 5)
+#define KVM_REG_LOONGARCH_LBT_FTOP (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 6)
+
#define LOONGARCH_REG_SHIFT 3
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+/* Device Control API on vm fd */
+#define KVM_LOONGARCH_VM_FEAT_CTRL 0
+#define KVM_LOONGARCH_VM_FEAT_LSX 0
+#define KVM_LOONGARCH_VM_FEAT_LASX 1
+#define KVM_LOONGARCH_VM_FEAT_X86BT 2
+#define KVM_LOONGARCH_VM_FEAT_ARMBT 3
+#define KVM_LOONGARCH_VM_FEAT_MIPSBT 4
+#define KVM_LOONGARCH_VM_FEAT_PMU 5
+#define KVM_LOONGARCH_VM_FEAT_PV_IPI 6
+#define KVM_LOONGARCH_VM_FEAT_PV_STEALTIME 7
+
/* Device Control API on vcpu fd */
#define KVM_LOONGARCH_VCPU_CPUCFG 0
#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
diff --git a/arch/loongarch/include/uapi/asm/kvm_para.h b/arch/loongarch/include/uapi/asm/kvm_para.h
new file mode 100644
index 000000000000..b0604aa9b4bb
--- /dev/null
+++ b/arch/loongarch/include/uapi/asm/kvm_para.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_KVM_PARA_H
+#define _UAPI_ASM_KVM_PARA_H
+
+#include <linux/types.h>
+
+/*
+ * CPUCFG index area: 0x40000000 -- 0x400000ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE 0x40000000
+#define CPUCFG_KVM_SIZE 0x100
+#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0)
+#define KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+#define KVM_FEATURE_IPI 1
+#define KVM_FEATURE_STEAL_TIME 2
+/* BIT 24 - 31 are features configurable by user space vmm */
+#define KVM_FEATURE_VIRT_EXTIOI 24
+
+#endif /* _UAPI_ASM_KVM_PARA_H */
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index 69a85f2479fb..6ab640101457 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -530,6 +530,10 @@ SYM_FUNC_END(_restore_lasx_context)
#ifdef CONFIG_CPU_HAS_LBT
STACK_FRAME_NON_STANDARD _restore_fp
+#ifdef CONFIG_CPU_HAS_LSX
STACK_FRAME_NON_STANDARD _restore_lsx
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
STACK_FRAME_NON_STANDARD _restore_lasx
#endif
+#endif
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index f4991c03514f..adac8fcbb2ac 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -102,9 +102,6 @@ void __init init_IRQ(void)
mp_ops.init_ipi();
#endif
- for (i = 0; i < NR_IRQS; i++)
- irq_set_noprobe(i);
-
for_each_possible_cpu(i) {
page = alloc_pages_node(cpu_to_node(i), GFP_KERNEL, order);
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index 9c9b75b76f62..708eda025ed8 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -13,6 +13,7 @@ static int has_steal_clock;
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key);
static u64 native_steal_clock(int cpu)
{
@@ -151,11 +152,14 @@ static void pv_init_ipi(void)
}
#endif
-static bool kvm_para_available(void)
+bool kvm_para_available(void)
{
int config;
static int hypervisor_type;
+ if (!cpu_has_hypervisor)
+ return false;
+
if (!hypervisor_type) {
config = read_cpucfg(CPUCFG_KVM_SIG);
if (!memcmp(&config, KVM_SIGNATURE, 4))
@@ -165,17 +169,22 @@ static bool kvm_para_available(void)
return hypervisor_type == HYPERVISOR_KVM;
}
-int __init pv_ipi_init(void)
+unsigned int kvm_arch_para_features(void)
{
- int feature;
+ static unsigned int feature;
- if (!cpu_has_hypervisor)
- return 0;
if (!kvm_para_available())
return 0;
- feature = read_cpucfg(CPUCFG_KVM_FEATURE);
- if (!(feature & KVM_FEATURE_IPI))
+ if (!feature)
+ feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+
+ return feature;
+}
+
+int __init pv_ipi_init(void)
+{
+ if (!kvm_para_has_feature(KVM_FEATURE_IPI))
return 0;
#ifdef CONFIG_SMP
@@ -206,7 +215,7 @@ static int pv_enable_steal_time(void)
}
addr |= KVM_STEAL_PHYS_VALID;
- kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr);
+ kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), addr);
return 0;
}
@@ -214,7 +223,7 @@ static int pv_enable_steal_time(void)
static void pv_disable_steal_time(void)
{
if (has_steal_clock)
- kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0);
+ kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), 0);
}
#ifdef CONFIG_SMP
@@ -258,15 +267,9 @@ static struct notifier_block pv_reboot_nb = {
int __init pv_time_init(void)
{
- int r, feature;
+ int r;
- if (!cpu_has_hypervisor)
- return 0;
- if (!kvm_para_available())
- return 0;
-
- feature = read_cpucfg(CPUCFG_KVM_FEATURE);
- if (!(feature & KVM_FEATURE_STEAL_TIME))
+ if (!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
return 0;
has_steal_clock = 1;
@@ -300,3 +303,13 @@ int __init pv_time_init(void)
return 0;
}
+
+int __init pv_spinlock_init(void)
+{
+ if (!cpu_has_hypervisor)
+ return 0;
+
+ static_branch_enable(&virt_spin_lock_key);
+
+ return 0;
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 0f0740f0be27..00e307203ddb 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -603,6 +603,8 @@ void __init setup_arch(char **cmdline_p)
arch_mem_init(cmdline_p);
resource_init();
+ jump_label_init(); /* Initialise the static keys for paravirtualization */
+
#ifdef CONFIG_SMP
plat_smp_setup();
prefill_possible_map();
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index ca405ab86aae..482b3c7e3042 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -476,7 +476,7 @@ core_initcall(ipi_pm_init);
#endif
/* Preload SMP state for boot cpu */
-void smp_prepare_boot_cpu(void)
+void __init smp_prepare_boot_cpu(void)
{
unsigned int cpu, node, rr_node;
@@ -509,6 +509,8 @@ void smp_prepare_boot_cpu(void)
rr_node = next_node_in(rr_node, node_online_map);
}
}
+
+ pv_spinlock_init();
}
/* called from main before smp_init() */
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ea73f9dc2cc6..90894f70ff4a 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -50,9 +50,7 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
break;
case CPUCFG_KVM_FEATURE:
- ret = KVM_FEATURE_IPI;
- if (kvm_pvtime_supported())
- ret |= KVM_FEATURE_STEAL_TIME;
+ ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK;
vcpu->arch.gprs[rd] = ret;
break;
default:
@@ -127,6 +125,14 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst)
rj = inst.reg2csr_format.rj;
csrid = inst.reg2csr_format.csr;
+ if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= vcpu->arch.max_pmu_csrid) {
+ if (kvm_guest_has_pmu(&vcpu->arch)) {
+ vcpu->arch.pc -= 4;
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+ return EMULATE_DONE;
+ }
+ }
+
/* Process CSR ops */
switch (rj) {
case 0: /* process csrrd */
@@ -697,25 +703,22 @@ static long kvm_save_notify(struct kvm_vcpu *vcpu)
id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
switch (id) {
- case KVM_FEATURE_STEAL_TIME:
- if (!kvm_pvtime_supported())
- return KVM_HCALL_INVALID_CODE;
-
+ case BIT(KVM_FEATURE_STEAL_TIME):
if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
return KVM_HCALL_INVALID_PARAMETER;
vcpu->arch.st.guest_addr = data;
if (!(data & KVM_STEAL_PHYS_VALID))
- break;
+ return 0;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
- break;
+ return 0;
default:
- break;
+ return KVM_HCALL_INVALID_CODE;
};
- return 0;
+ return KVM_HCALL_INVALID_CODE;
};
/*
@@ -748,6 +751,14 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu)
+{
+ if (kvm_own_lbt(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu)
{
unsigned int min, cpu, i;
@@ -781,19 +792,21 @@ static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu)
*/
static void kvm_handle_service(struct kvm_vcpu *vcpu)
{
+ long ret = KVM_HCALL_INVALID_CODE;
unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0);
- long ret;
switch (func) {
case KVM_HCALL_FUNC_IPI:
- kvm_send_pv_ipi(vcpu);
- ret = KVM_HCALL_SUCCESS;
+ if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_IPI)) {
+ kvm_send_pv_ipi(vcpu);
+ ret = KVM_HCALL_SUCCESS;
+ }
break;
case KVM_HCALL_FUNC_NOTIFY:
- ret = kvm_save_notify(vcpu);
+ if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME))
+ ret = kvm_save_notify(vcpu);
break;
default:
- ret = KVM_HCALL_INVALID_CODE;
break;
}
@@ -865,6 +878,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
[EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
[EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
+ [EXCCODE_BTDIS] = kvm_handle_lbt_disabled,
[EXCCODE_GSPR] = kvm_handle_gspr,
[EXCCODE_HVC] = kvm_handle_hypercall,
};
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 80e988985a6a..0c292f818492 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -277,6 +277,10 @@ SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
#ifdef CONFIG_CPU_HAS_LBT
STACK_FRAME_NON_STANDARD kvm_restore_fpu
+#ifdef CONFIG_CPU_HAS_LSX
STACK_FRAME_NON_STANDARD kvm_restore_lsx
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
STACK_FRAME_NON_STANDARD kvm_restore_lasx
#endif
+#endif
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c
index bcc6b6d063d9..74a4b5c272d6 100644
--- a/arch/loongarch/kvm/timer.c
+++ b/arch/loongarch/kvm/timer.c
@@ -188,10 +188,3 @@ void kvm_save_timer(struct kvm_vcpu *vcpu)
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
preempt_enable();
}
-
-void kvm_reset_timer(struct kvm_vcpu *vcpu)
-{
- write_gcsr_timercfg(0);
- kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG, 0);
- hrtimer_cancel(&vcpu->arch.swtimer);
-}
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 16756ffb55e8..0697b1064251 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -6,6 +6,7 @@
#include <linux/kvm_host.h>
#include <linux/entry-kvm.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/loongarch.h>
#include <asm/setup.h>
#include <asm/time.h>
@@ -31,6 +32,126 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
sizeof(kvm_vcpu_stats_desc),
};
+static inline void kvm_save_host_pmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_context *context;
+
+ context = this_cpu_ptr(vcpu->kvm->arch.vmcs);
+ context->perf_cntr[0] = read_csr_perfcntr0();
+ context->perf_cntr[1] = read_csr_perfcntr1();
+ context->perf_cntr[2] = read_csr_perfcntr2();
+ context->perf_cntr[3] = read_csr_perfcntr3();
+ context->perf_ctrl[0] = write_csr_perfctrl0(0);
+ context->perf_ctrl[1] = write_csr_perfctrl1(0);
+ context->perf_ctrl[2] = write_csr_perfctrl2(0);
+ context->perf_ctrl[3] = write_csr_perfctrl3(0);
+}
+
+static inline void kvm_restore_host_pmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_context *context;
+
+ context = this_cpu_ptr(vcpu->kvm->arch.vmcs);
+ write_csr_perfcntr0(context->perf_cntr[0]);
+ write_csr_perfcntr1(context->perf_cntr[1]);
+ write_csr_perfcntr2(context->perf_cntr[2]);
+ write_csr_perfcntr3(context->perf_cntr[3]);
+ write_csr_perfctrl0(context->perf_ctrl[0]);
+ write_csr_perfctrl1(context->perf_ctrl[1]);
+ write_csr_perfctrl2(context->perf_ctrl[2]);
+ write_csr_perfctrl3(context->perf_ctrl[3]);
+}
+
+
+static inline void kvm_save_guest_pmu(struct kvm_vcpu *vcpu)
+{
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+}
+
+static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu)
+{
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+}
+
+static int kvm_own_pmu(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+
+ if (!kvm_guest_has_pmu(&vcpu->arch))
+ return -EINVAL;
+
+ kvm_save_host_pmu(vcpu);
+
+ /* Set PM0-PM(num) to guest */
+ val = read_csr_gcfg() & ~CSR_GCFG_GPERF;
+ val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT;
+ write_csr_gcfg(val);
+
+ kvm_restore_guest_pmu(vcpu);
+
+ return 0;
+}
+
+static void kvm_lose_pmu(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (!(vcpu->arch.aux_inuse & KVM_LARCH_PMU))
+ return;
+
+ kvm_save_guest_pmu(vcpu);
+
+ /* Disable pmu access from guest */
+ write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF);
+
+ /*
+ * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when
+ * exiting the guest, so that the next time trap into the guest.
+ * We don't need to deal with PMU CSRs contexts.
+ */
+ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+ if (!(val & KVM_PMU_EVENT_ENABLED))
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU;
+
+ kvm_restore_host_pmu(vcpu);
+}
+
+static void kvm_restore_pmu(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU))
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+}
+
+static void kvm_check_pmu(struct kvm_vcpu *vcpu)
+{
+ if (kvm_check_request(KVM_REQ_PMU, vcpu)) {
+ kvm_own_pmu(vcpu);
+ vcpu->arch.aux_inuse |= KVM_LARCH_PMU;
+ }
+}
+
static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
{
u32 version;
@@ -158,6 +279,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
/* Make sure the vcpu mode has been written */
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
kvm_check_vpid(vcpu);
+ kvm_check_pmu(vcpu);
/*
* Called after function kvm_check_vpid()
@@ -195,6 +317,8 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Set a default exit reason */
run->exit_reason = KVM_EXIT_UNKNOWN;
+ kvm_lose_pmu(vcpu);
+
guest_timing_exit_irqoff();
guest_state_exit_irqoff();
local_irq_enable();
@@ -468,6 +592,22 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
kvm_write_sw_gcsr(csr, id, val);
+ /*
+ * After modifying the PMU CSR register value of the vcpu.
+ * If the PMU CSRs are used, we need to set KVM_REQ_PMU.
+ */
+ if (id >= LOONGARCH_CSR_PERFCTRL0 && id <= LOONGARCH_CSR_PERFCNTR3) {
+ unsigned long val;
+
+ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) |
+ kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) |
+ kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) |
+ kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+
+ if (val & KVM_PMU_EVENT_ENABLED)
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+ }
+
return ret;
}
@@ -497,6 +637,12 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v)
*v |= CPUCFG2_LSX;
if (cpu_has_lasx)
*v |= CPUCFG2_LASX;
+ if (cpu_has_lbt_x86)
+ *v |= CPUCFG2_X86BT;
+ if (cpu_has_lbt_arm)
+ *v |= CPUCFG2_ARMBT;
+ if (cpu_has_lbt_mips)
+ *v |= CPUCFG2_MIPSBT;
return 0;
case LOONGARCH_CPUCFG3:
@@ -506,6 +652,12 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v)
case LOONGARCH_CPUCFG5:
*v = GENMASK(31, 0);
return 0;
+ case LOONGARCH_CPUCFG6:
+ if (cpu_has_pmp)
+ *v = GENMASK(14, 0);
+ else
+ *v = 0;
+ return 0;
case LOONGARCH_CPUCFG16:
*v = GENMASK(16, 0);
return 0;
@@ -550,6 +702,17 @@ static int kvm_check_cpucfg(int id, u64 val)
/* LASX architecturally implies LSX and FP but val does not satisfy that */
return -EINVAL;
return 0;
+ case LOONGARCH_CPUCFG6:
+ if (val & CPUCFG6_PMP) {
+ u32 host = read_cpucfg(LOONGARCH_CPUCFG6);
+ if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS))
+ return -EINVAL;
+ if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM))
+ return -EINVAL;
+ if ((val & CPUCFG6_UPM) && !(host & CPUCFG6_UPM))
+ return -EINVAL;
+ }
+ return 0;
default:
/*
* Values for the other CPUCFG IDs are not being further validated
@@ -577,6 +740,34 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
else
ret = -EINVAL;
break;
+ case KVM_REG_LOONGARCH_LBT:
+ if (!kvm_guest_has_lbt(&vcpu->arch))
+ return -ENXIO;
+
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_LBT_SCR0:
+ *v = vcpu->arch.lbt.scr0;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR1:
+ *v = vcpu->arch.lbt.scr1;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR2:
+ *v = vcpu->arch.lbt.scr2;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR3:
+ *v = vcpu->arch.lbt.scr3;
+ break;
+ case KVM_REG_LOONGARCH_LBT_EFLAGS:
+ *v = vcpu->arch.lbt.eflags;
+ break;
+ case KVM_REG_LOONGARCH_LBT_FTOP:
+ *v = vcpu->arch.fpu.ftop;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
case KVM_REG_LOONGARCH_KVM:
switch (reg->id) {
case KVM_REG_LOONGARCH_COUNTER:
@@ -635,6 +826,37 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
if (ret)
break;
vcpu->arch.cpucfg[id] = (u32)v;
+ if (id == LOONGARCH_CPUCFG6)
+ vcpu->arch.max_pmu_csrid =
+ LOONGARCH_CSR_PERFCTRL0 + 2 * kvm_get_pmu_num(&vcpu->arch) + 1;
+ break;
+ case KVM_REG_LOONGARCH_LBT:
+ if (!kvm_guest_has_lbt(&vcpu->arch))
+ return -ENXIO;
+
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_LBT_SCR0:
+ vcpu->arch.lbt.scr0 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR1:
+ vcpu->arch.lbt.scr1 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR2:
+ vcpu->arch.lbt.scr2 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR3:
+ vcpu->arch.lbt.scr3 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_EFLAGS:
+ vcpu->arch.lbt.eflags = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_FTOP:
+ vcpu->arch.fpu.ftop = v;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
break;
case KVM_REG_LOONGARCH_KVM:
switch (reg->id) {
@@ -647,7 +869,7 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
vcpu->kvm->arch.time_offset = (signed long)(v - drdtime());
break;
case KVM_REG_LOONGARCH_VCPU_RESET:
- kvm_reset_timer(vcpu);
+ vcpu->arch.st.guest_addr = 0;
memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending));
memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear));
break;
@@ -728,7 +950,10 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
switch (attr->attr) {
- case 2:
+ case LOONGARCH_CPUCFG2:
+ case LOONGARCH_CPUCFG6:
+ return 0;
+ case CPUCFG_KVM_FEATURE:
return 0;
default:
return -ENXIO;
@@ -740,8 +965,8 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- if (!kvm_pvtime_supported() ||
- attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+ if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
+ || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
return -ENXIO;
return 0;
@@ -773,9 +998,18 @@ static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu,
uint64_t val;
uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
- ret = _kvm_get_cpucfg_mask(attr->attr, &val);
- if (ret)
- return ret;
+ switch (attr->attr) {
+ case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+ ret = _kvm_get_cpucfg_mask(attr->attr, &val);
+ if (ret)
+ return ret;
+ break;
+ case CPUCFG_KVM_FEATURE:
+ val = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK;
+ break;
+ default:
+ return -ENXIO;
+ }
put_user(val, uaddr);
@@ -788,8 +1022,8 @@ static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
u64 gpa;
u64 __user *user = (u64 __user *)attr->addr;
- if (!kvm_pvtime_supported() ||
- attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+ if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
+ || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
return -ENXIO;
gpa = vcpu->arch.st.guest_addr;
@@ -821,7 +1055,28 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- return -ENXIO;
+ u64 val, valid;
+ u64 __user *user = (u64 __user *)attr->addr;
+ struct kvm *kvm = vcpu->kvm;
+
+ switch (attr->attr) {
+ case CPUCFG_KVM_FEATURE:
+ if (get_user(val, user))
+ return -EFAULT;
+
+ valid = LOONGARCH_PV_FEAT_MASK;
+ if (val & ~valid)
+ return -EINVAL;
+
+ /* All vCPUs need set the same PV features */
+ if ((kvm->arch.pv_features & LOONGARCH_PV_FEAT_UPDATED)
+ && ((kvm->arch.pv_features & valid) != val))
+ return -EINVAL;
+ kvm->arch.pv_features = val | LOONGARCH_PV_FEAT_UPDATED;
+ return 0;
+ default:
+ return -ENXIO;
+ }
}
static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
@@ -831,8 +1086,8 @@ static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
u64 gpa, __user *user = (u64 __user *)attr->addr;
struct kvm *kvm = vcpu->kvm;
- if (!kvm_pvtime_supported() ||
- attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+ if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
+ || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
return -ENXIO;
if (get_user(gpa, user))
@@ -977,12 +1232,66 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
+#ifdef CONFIG_CPU_HAS_LBT
+int kvm_own_lbt(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_lbt(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+ set_csr_euen(CSR_EUEN_LBTEN);
+ _restore_lbt(&vcpu->arch.lbt);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
+ preempt_enable();
+
+ return 0;
+}
+
+static void kvm_lose_lbt(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) {
+ _save_lbt(&vcpu->arch.lbt);
+ clear_csr_euen(CSR_EUEN_LBTEN);
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_LBT;
+ }
+ preempt_enable();
+}
+
+static void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr)
+{
+ /*
+ * If TM is enabled, top register save/restore will
+ * cause lbt exception, here enable lbt in advance
+ */
+ if (fcsr & FPU_CSR_TM)
+ kvm_own_lbt(vcpu);
+}
+
+static void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ if (vcpu->arch.aux_inuse & KVM_LARCH_LBT)
+ return;
+ kvm_check_fcsr(vcpu, read_fcsr(LOONGARCH_FCSR0));
+ }
+}
+#else
+static inline void kvm_lose_lbt(struct kvm_vcpu *vcpu) { }
+static inline void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) { }
+static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
+#endif
+
/* Enable FPU and restore context */
void kvm_own_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
- /* Enable FPU */
+ /*
+ * Enable FPU for guest
+ * Set FR and FRE according to guest context
+ */
+ kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
set_csr_euen(CSR_EUEN_FPEN);
kvm_restore_fpu(&vcpu->arch.fpu);
@@ -1002,6 +1311,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
preempt_disable();
/* Enable LSX for guest */
+ kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
case KVM_LARCH_FPU:
@@ -1036,6 +1346,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
preempt_disable();
+ kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
case KVM_LARCH_LSX:
@@ -1067,6 +1378,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
+ kvm_check_fcsr_alive(vcpu);
if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) {
kvm_save_lasx(&vcpu->arch.fpu);
vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX);
@@ -1089,6 +1401,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
/* Disable FPU */
clear_csr_euen(CSR_EUEN_FPEN);
}
+ kvm_lose_lbt(vcpu);
preempt_enable();
}
@@ -1235,6 +1548,9 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+ /* Restore hardware PMU CSRs */
+ kvm_restore_pmu(vcpu);
+
/* Don't bother restoring registers multiple times unless necessary */
if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
return 0;
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
index 6b2e4f66ad26..4ba734aaef87 100644
--- a/arch/loongarch/kvm/vm.c
+++ b/arch/loongarch/kvm/vm.c
@@ -5,6 +5,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_vcpu.h>
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
@@ -39,6 +40,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.phyid_map_lock);
kvm_init_vmcs(kvm);
+
+ /* Enable all PV features by default */
+ kvm->arch.pv_features = BIT(KVM_FEATURE_IPI);
+ if (kvm_pvtime_supported())
+ kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME);
+
kvm->arch.gpa_size = BIT(cpu_vabits - 1);
kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
kvm->arch.invalid_ptes[0] = 0;
@@ -99,7 +106,67 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
return r;
}
+static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_LOONGARCH_VM_FEAT_LSX:
+ if (cpu_has_lsx)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_LASX:
+ if (cpu_has_lasx)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_X86BT:
+ if (cpu_has_lbt_x86)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_ARMBT:
+ if (cpu_has_lbt_arm)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_MIPSBT:
+ if (cpu_has_lbt_mips)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_PMU:
+ if (cpu_has_pmp)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_PV_IPI:
+ return 0;
+ case KVM_LOONGARCH_VM_FEAT_PV_STEALTIME:
+ if (kvm_pvtime_supported())
+ return 0;
+ return -ENXIO;
+ default:
+ return -ENXIO;
+ }
+}
+
+static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_LOONGARCH_VM_FEAT_CTRL:
+ return kvm_vm_feature_has_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+}
+
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
- return -ENOIOCTLCMD;
+ void __user *argp = (void __user *)arg;
+ struct kvm *kvm = filp->private_data;
+ struct kvm_device_attr attr;
+
+ switch (ioctl) {
+ case KVM_HAS_DEVICE_ATTR:
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_has_attr(kvm, &attr);
+ default:
+ return -ENOIOCTLCMD;
+ }
}
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 3827dc76edd8..4520c5741579 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -193,11 +193,6 @@ asmlinkage void __init mmu_init(void)
{
unsigned int kstart, ksize;
- if (!memblock.reserved.cnt) {
- pr_emerg("Error memory count\n");
- machine_restart(NULL);
- }
-
if ((u32) memblock.memory.regions[0].size < 0x400000) {
pr_emerg("Memory must be greater than 4MB\n");
machine_restart(NULL);
diff --git a/arch/mips/alchemy/common/dma.c b/arch/mips/alchemy/common/dma.c
index 973049b5bd61..44d8433b1f45 100644
--- a/arch/mips/alchemy/common/dma.c
+++ b/arch/mips/alchemy/common/dma.c
@@ -131,29 +131,6 @@ static const struct dma_dev dma_dev_table_bank2[DMA_NUM_DEV_BANK2] = {
{ AU1100_SD1_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR } /* coherent */
};
-void dump_au1000_dma_channel(unsigned int dmanr)
-{
- struct dma_chan *chan;
-
- if (dmanr >= NUM_AU1000_DMA_CHANNELS)
- return;
- chan = &au1000_dma_table[dmanr];
-
- printk(KERN_INFO "Au1000 DMA%d Register Dump:\n", dmanr);
- printk(KERN_INFO " mode = 0x%08x\n",
- __raw_readl(chan->io + DMA_MODE_SET));
- printk(KERN_INFO " addr = 0x%08x\n",
- __raw_readl(chan->io + DMA_PERIPHERAL_ADDR));
- printk(KERN_INFO " start0 = 0x%08x\n",
- __raw_readl(chan->io + DMA_BUFFER0_START));
- printk(KERN_INFO " start1 = 0x%08x\n",
- __raw_readl(chan->io + DMA_BUFFER1_START));
- printk(KERN_INFO " count0 = 0x%08x\n",
- __raw_readl(chan->io + DMA_BUFFER0_COUNT));
- printk(KERN_INFO " count1 = 0x%08x\n",
- __raw_readl(chan->io + DMA_BUFFER1_COUNT));
-}
-
/*
* Finds a free channel, and binds the requested device to it.
* Returns the allocated channel number, or negative on error.
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
index ec6d58008f8e..2a59b85f88aa 100644
--- a/arch/mips/crypto/crc32-mips.c
+++ b/arch/mips/crypto/crc32-mips.c
@@ -77,24 +77,26 @@ static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
{
u32 crc = crc_;
-#ifdef CONFIG_64BIT
- while (len >= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32(crc, value, d);
- p += sizeof(u64);
- len -= sizeof(u64);
- }
-
- if (len & sizeof(u32)) {
-#else /* !CONFIG_64BIT */
- while (len >= sizeof(u32)) {
-#endif
- u32 value = get_unaligned_le32(p);
-
- CRC32(crc, value, w);
- p += sizeof(u32);
- len -= sizeof(u32);
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32(crc, value, d);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ }
+ } else {
+ for (; len >= sizeof(u32); len -= sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ }
}
if (len & sizeof(u16)) {
@@ -117,24 +119,26 @@ static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
{
u32 crc = crc_;
-#ifdef CONFIG_64BIT
- while (len >= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
- CRC32C(crc, value, d);
- p += sizeof(u64);
- len -= sizeof(u64);
- }
+ CRC32(crc, value, d);
+ }
- if (len & sizeof(u32)) {
-#else /* !CONFIG_64BIT */
- while (len >= sizeof(u32)) {
-#endif
- u32 value = get_unaligned_le32(p);
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ }
+ } else {
+ for (; len >= sizeof(u32); len -= sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
- CRC32C(crc, value, w);
- p += sizeof(u32);
- len -= sizeof(u32);
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ }
}
if (len & sizeof(u16)) {
diff --git a/arch/mips/include/asm/cmp.h b/arch/mips/include/asm/cmp.h
index e9e87504bb0c..71e20e6cd38d 100644
--- a/arch/mips/include/asm/cmp.h
+++ b/arch/mips/include/asm/cmp.h
@@ -7,12 +7,4 @@
*/
struct task_struct;
-extern void cmp_smp_setup(void);
-extern void cmp_smp_finish(void);
-extern void cmp_boot_secondary(int cpu, struct task_struct *t);
-extern void cmp_init_secondary(void);
-extern void cmp_prepare_cpus(unsigned int max_cpus);
-
-/* This is platform specific */
-extern void cmp_send_ipi(int cpu, unsigned int action);
#endif /* _ASM_CMP_H */
diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h
index 908e96e3a311..8fcad6984389 100644
--- a/arch/mips/include/asm/dec/prom.h
+++ b/arch/mips/include/asm/dec/prom.h
@@ -160,6 +160,5 @@ extern void prom_identify_arch(u32);
extern void prom_init_cmdline(s32, s32 *, u32);
extern void register_prom_console(void);
-extern void unregister_prom_console(void);
#endif /* _ASM_DEC_PROM_H */
diff --git a/arch/mips/include/asm/mach-au1x00/au1000_dma.h b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
index b82e513c8523..18c24051a1f2 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000_dma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
@@ -124,7 +124,6 @@ extern int request_au1000_dma(int dev_id,
extern void free_au1000_dma(unsigned int dmanr);
extern int au1000_dma_read_proc(char *buf, char **start, off_t fpos,
int length, int *eof, void *data);
-extern void dump_au1000_dma_channel(unsigned int dmanr);
extern spinlock_t au1000_dma_spin_lock;
static inline struct dma_chan *get_dma_chan(unsigned int dmanr)
diff --git a/arch/mips/include/asm/mips-boards/generic.h b/arch/mips/include/asm/mips-boards/generic.h
index c904c24550f6..5befba569c9f 100644
--- a/arch/mips/include/asm/mips-boards/generic.h
+++ b/arch/mips/include/asm/mips-boards/generic.h
@@ -73,7 +73,4 @@ extern void mips_pcibios_init(void);
#define mips_pcibios_init() do { } while (0)
#endif
-extern void mips_scroll_message(void);
-extern void mips_display_message(const char *str);
-
#endif /* __ASM_MIPS_BOARDS_GENERIC_H */
diff --git a/arch/mips/include/asm/mips_mt.h b/arch/mips/include/asm/mips_mt.h
index 28917f1582b3..6ea02af29876 100644
--- a/arch/mips/include/asm/mips_mt.h
+++ b/arch/mips/include/asm/mips_mt.h
@@ -17,8 +17,6 @@ extern int vpelimit;
extern cpumask_t mt_fpu_cpumask;
extern unsigned long mt_fpemul_threshold;
-extern void mips_mt_regdump(unsigned long previous_mvpcontrol_value);
-
#ifdef CONFIG_MIPS_MT
extern void mips_mt_set_cpuoptions(void);
#else
diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h
index d0a540e88bb4..d10afd13ee5b 100644
--- a/arch/mips/include/uapi/asm/sigcontext.h
+++ b/arch/mips/include/uapi/asm/sigcontext.h
@@ -56,7 +56,6 @@ struct sigcontext {
#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32
-#include <linux/posix_types.h>
/*
* Keep this struct definition in sync with the sigcontext fragment
* in arch/mips/kernel/asm-offsets.c
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 60ebaed28a4c..8ab7582291ab 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -151,6 +151,12 @@
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/mips/jazz/setup.c b/arch/mips/jazz/setup.c
index e318ea11c858..d21e5d441f53 100644
--- a/arch/mips/jazz/setup.c
+++ b/arch/mips/jazz/setup.c
@@ -23,8 +23,6 @@
#include <asm/reboot.h>
#include <asm/tlbmisc.h>
-extern asmlinkage void jazz_handle_int(void);
-
extern void jazz_machine_restart(char *command);
static struct resource jazz_io_resources[] = {
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 368e8475870f..5f6e9e2ebbdb 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -303,13 +303,6 @@ int r4k_clockevent_init(void)
if (!c0_compare_int_usable())
return -ENXIO;
- /*
- * With vectored interrupts things are getting platform specific.
- * get_c0_compare_int is a hook to allow a platform to return the
- * interrupt number of its liking.
- */
- irq = get_c0_compare_int();
-
cd = &per_cpu(mips_clockevent_device, cpu);
cd->name = "MIPS";
@@ -320,7 +313,6 @@ int r4k_clockevent_init(void)
min_delta = calculate_min_delta();
cd->rating = 300;
- cd->irq = irq;
cd->cpumask = cpumask_of(cpu);
cd->set_next_event = mips_next_event;
cd->event_handler = mips_event_handler;
@@ -332,6 +324,13 @@ int r4k_clockevent_init(void)
cp0_timer_irq_installed = 1;
+ /*
+ * With vectored interrupts things are getting platform specific.
+ * get_c0_compare_int is a hook to allow a platform to return the
+ * interrupt number of its liking.
+ */
+ irq = get_c0_compare_int();
+
if (request_irq(irq, c0_compare_interrupt, flags, "timer",
c0_compare_interrupt))
pr_err("Failed to request irq %d (timer)\n", irq);
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index bda7f193baab..af7412549e6e 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1724,12 +1724,16 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */
+ change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER,
+ LOONGSON_CONF6_INTIMER);
break;
case PRID_IMP_LOONGSON_64G:
__cpu_name[cpu] = "ICT Loongson-3";
set_elf_platform(cpu, "loongson3a");
set_isa(c, MIPS_CPU_ISA_M64R2);
decode_cpucfg(c);
+ change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER,
+ LOONGSON_CONF6_INTIMER);
break;
default:
panic("Unknown Loongson Processor ID!");
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index bdb1fa8931f4..59eca397f297 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -21,9 +21,7 @@ static struct clocksource clocksource_mips = {
.name = "MIPS",
.read = c0_hpt_read,
.mask = CLOCKSOURCE_MASK(32),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS |
- CLOCK_SOURCE_MUST_VERIFY |
- CLOCK_SOURCE_VERIFY_PERCPU,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static u64 __maybe_unused notrace r4k_read_sched_clock(void)
diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c
index c938ba208fc0..37676a44fefb 100644
--- a/arch/mips/kernel/mips-mt.c
+++ b/arch/mips/kernel/mips-mt.c
@@ -43,83 +43,6 @@ static int __init maxtcs(char *str)
__setup("maxtcs=", maxtcs);
-/*
- * Dump new MIPS MT state for the core. Does not leave TCs halted.
- * Takes an argument which taken to be a pre-call MVPControl value.
- */
-
-void mips_mt_regdump(unsigned long mvpctl)
-{
- unsigned long flags;
- unsigned long vpflags;
- unsigned long mvpconf0;
- int nvpe;
- int ntc;
- int i;
- int tc;
- unsigned long haltval;
- unsigned long tcstatval;
-
- local_irq_save(flags);
- vpflags = dvpe();
- printk("=== MIPS MT State Dump ===\n");
- printk("-- Global State --\n");
- printk(" MVPControl Passed: %08lx\n", mvpctl);
- printk(" MVPControl Read: %08lx\n", vpflags);
- printk(" MVPConf0 : %08lx\n", (mvpconf0 = read_c0_mvpconf0()));
- nvpe = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
- ntc = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
- printk("-- per-VPE State --\n");
- for (i = 0; i < nvpe; i++) {
- for (tc = 0; tc < ntc; tc++) {
- settc(tc);
- if ((read_tc_c0_tcbind() & TCBIND_CURVPE) == i) {
- printk(" VPE %d\n", i);
- printk(" VPEControl : %08lx\n",
- read_vpe_c0_vpecontrol());
- printk(" VPEConf0 : %08lx\n",
- read_vpe_c0_vpeconf0());
- printk(" VPE%d.Status : %08lx\n",
- i, read_vpe_c0_status());
- printk(" VPE%d.EPC : %08lx %pS\n",
- i, read_vpe_c0_epc(),
- (void *) read_vpe_c0_epc());
- printk(" VPE%d.Cause : %08lx\n",
- i, read_vpe_c0_cause());
- printk(" VPE%d.Config7 : %08lx\n",
- i, read_vpe_c0_config7());
- break; /* Next VPE */
- }
- }
- }
- printk("-- per-TC State --\n");
- for (tc = 0; tc < ntc; tc++) {
- settc(tc);
- if (read_tc_c0_tcbind() == read_c0_tcbind()) {
- /* Are we dumping ourself? */
- haltval = 0; /* Then we're not halted, and mustn't be */
- tcstatval = flags; /* And pre-dump TCStatus is flags */
- printk(" TC %d (current TC with VPE EPC above)\n", tc);
- } else {
- haltval = read_tc_c0_tchalt();
- write_tc_c0_tchalt(1);
- tcstatval = read_tc_c0_tcstatus();
- printk(" TC %d\n", tc);
- }
- printk(" TCStatus : %08lx\n", tcstatval);
- printk(" TCBind : %08lx\n", read_tc_c0_tcbind());
- printk(" TCRestart : %08lx %pS\n",
- read_tc_c0_tcrestart(), (void *) read_tc_c0_tcrestart());
- printk(" TCHalt : %08lx\n", haltval);
- printk(" TCContext : %08lx\n", read_tc_c0_tccontext());
- if (!haltval)
- write_tc_c0_tchalt(0);
- }
- printk("===========================\n");
- evpe(vpflags);
- local_irq_restore(flags);
-}
-
static int mt_opt_rpsctl = -1;
static int mt_opt_nblsu = -1;
static int mt_opt_forceconfig7;
diff --git a/arch/mips/ralink/irq-gic.c b/arch/mips/ralink/irq-gic.c
index 3bab51a5fb4c..8bc566ea00e5 100644
--- a/arch/mips/ralink/irq-gic.c
+++ b/arch/mips/ralink/irq-gic.c
@@ -10,6 +10,7 @@
#include <linux/of.h>
#include <linux/irqchip.h>
#include <asm/mips-cps.h>
+#include <asm/time.h>
int get_c0_perfcount_int(void)
{
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
index dcf2a44ac51e..926082655a78 100644
--- a/arch/mips/ralink/timer-gic.c
+++ b/arch/mips/ralink/timer-gic.c
@@ -11,6 +11,8 @@
#include <linux/of_clk.h>
#include <linux/clocksource.h>
+#include <asm/time.h>
+
#include "common.h"
void __init plat_time_init(void)
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index be264c2b1a11..38fc0b188e08 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -132,6 +132,12 @@
#define SO_PASSPIDFD 0x404A
#define SO_PEERPIDFD 0x404B
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 34d91cb8b259..96970fa75e4a 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -459,7 +459,6 @@ void free_initmem(void)
unsigned long kernel_end = (unsigned long)&_end;
/* Remap kernel text and data, but do not touch init section yet. */
- kernel_set_to_readonly = true;
map_pages(init_end, __pa(init_end), kernel_end - init_end,
PAGE_KERNEL, 0);
@@ -493,11 +492,18 @@ void free_initmem(void)
#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_rodata_ro(void)
{
- /* rodata memory was already mapped with KERNEL_RO access rights by
- pagetable_init() and map_pages(). No need to do additional stuff here */
- unsigned long roai_size = __end_ro_after_init - __start_ro_after_init;
+ unsigned long start = (unsigned long) &__start_rodata;
+ unsigned long end = (unsigned long) &__end_rodata;
+
+ pr_info("Write protecting the kernel read-only data: %luk\n",
+ (end - start) >> 10);
+
+ kernel_set_to_readonly = true;
+ map_pages(start, __pa(start), end - start, PAGE_KERNEL, 0);
- pr_info("Write protected read-only-after-init data: %luk\n", roai_size >> 10);
+ /* force the kernel to see the new page table entries */
+ flush_cache_all();
+ flush_tlb_all();
}
#endif
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d7b09b064a8a..8a4ee57cd4ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1026,6 +1026,10 @@ config PPC_MEM_KEYS
If unsure, say y.
+config ARCH_PKEY_BITS
+ int
+ default 5
+
config PPC_SECURE_BOOT
prompt "Enable secure boot support"
bool
diff --git a/arch/powerpc/crypto/curve25519-ppc64le-core.c b/arch/powerpc/crypto/curve25519-ppc64le-core.c
index 4e3e44ea4484..f7810be0b292 100644
--- a/arch/powerpc/crypto/curve25519-ppc64le-core.c
+++ b/arch/powerpc/crypto/curve25519-ppc64le-core.c
@@ -295,5 +295,6 @@ module_exit(curve25519_mod_exit);
MODULE_ALIAS_CRYPTO("curve25519");
MODULE_ALIAS_CRYPTO("curve25519-ppc64le");
+MODULE_DESCRIPTION("PPC64le Curve25519 scalar multiplication with 51 bits limbs");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Danny Tsen <dtsen@us.ibm.com>");
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 9508399dd036..b481738c4bb5 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -52,7 +52,7 @@
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
#define pgd_ERROR(e) \
- pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+ pr_err("%s:%d: bad pgd %08llx.\n", __FILE__, __LINE__, (unsigned long long)pgd_val(e))
/*
* This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
@@ -170,7 +170,7 @@ static inline void pmd_clear(pmd_t *pmdp)
#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#else
#define pmd_page_vaddr(pmd) \
- ((const void *)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
+ ((const void *)((unsigned long)pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
#define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
#endif
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 7b3d4c592a10..f3086e39e7d2 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -49,16 +49,22 @@ static inline unsigned long pud_val(pud_t x)
#endif /* CONFIG_PPC64 */
/* PGD level */
-#if defined(CONFIG_PPC_E500) && defined(CONFIG_PTE_64BIT)
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT)
typedef struct { unsigned long long pgd; } pgd_t;
+
+static inline unsigned long long pgd_val(pgd_t x)
+{
+ return x.pgd;
+}
#else
typedef struct { unsigned long pgd; } pgd_t;
-#endif
-#define __pgd(x) ((pgd_t) { (x) })
+
static inline unsigned long pgd_val(pgd_t x)
{
return x.pgd;
}
+#endif
+#define __pgd(x) ((pgd_t) { (x) })
/* Page protection bits */
typedef struct { unsigned long pgprot; } pgprot_t;
diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S
index 426e1ccc6971..8f57107000a2 100644
--- a/arch/powerpc/kernel/vdso/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso32.lds.S
@@ -74,6 +74,8 @@ SECTIONS
.got : { *(.got) } :text
.plt : { *(.plt) }
+ .rela.dyn : { *(.rela .rela*) }
+
_end = .;
__end = .;
PROVIDE(end = .);
@@ -87,7 +89,7 @@ SECTIONS
*(.branch_lt)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
- *(.got1 .glink .iplt .rela*)
+ *(.got1 .glink .iplt)
}
}
diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
index bda6c8cdd459..400819258c06 100644
--- a/arch/powerpc/kernel/vdso/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
@@ -69,7 +69,7 @@ SECTIONS
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.gcc_except_table : { *(.gcc_except_table) }
- .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .rela.dyn ALIGN(8) : { *(.rela .rela*) }
.got ALIGN(8) : { *(.got .toc) }
@@ -86,7 +86,7 @@ SECTIONS
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
*(.opd)
- *(.glink .iplt .plt .rela*)
+ *(.glink .iplt .plt)
}
}
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 5de4dd549f6e..bcc7e4dff8c3 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -697,7 +697,15 @@ again:
}
release:
- qnodesp->count--; /* release the node */
+ /*
+ * Clear the lock before releasing the node, as another CPU might see stale
+ * values if an interrupt occurs after we increment qnodesp->count
+ * but before node->lock is initialized. The barrier ensures that
+ * there are no further stores to the node after it has been released.
+ */
+ node->lock = NULL;
+ barrier();
+ qnodesp->count--;
}
void queued_spin_lock_slowpath(struct qspinlock *lock)
diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c
index 113edf76d3ce..d26656b07b72 100644
--- a/arch/powerpc/mm/nohash/tlb_64e.c
+++ b/arch/powerpc/mm/nohash/tlb_64e.c
@@ -33,7 +33,7 @@
* though this will probably be made common with other nohash
* implementations at some point
*/
-int mmu_pte_psize; /* Page size used for PTE pages */
+static int mmu_pte_psize; /* Page size used for PTE pages */
int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
unsigned long linear_map_top; /* Top of linear mapping */
diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
index 5c4f1a9ca154..6f4a41a9352a 100644
--- a/arch/powerpc/platforms/chrp/pegasos_eth.c
+++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
@@ -14,7 +14,7 @@
#include <linux/ioport.h>
#include <linux/device.h>
#include <linux/platform_device.h>
-#include <linux/mv643xx.h>
+#include <linux/mv643xx_eth.h>
#include <linux/pci.h>
#define PEGASOS2_MARVELL_REGBASE (0xf1000000)
@@ -25,12 +25,15 @@
#define PEGASOS2_SRAM_BASE_ETH_PORT0 (PEGASOS2_SRAM_BASE)
#define PEGASOS2_SRAM_BASE_ETH_PORT1 (PEGASOS2_SRAM_BASE_ETH_PORT0 + (PEGASOS2_SRAM_SIZE / 2) )
-
#define PEGASOS2_SRAM_RXRING_SIZE (PEGASOS2_SRAM_SIZE/4)
#define PEGASOS2_SRAM_TXRING_SIZE (PEGASOS2_SRAM_SIZE/4)
#undef BE_VERBOSE
+#define MV64340_BASE_ADDR_ENABLE 0x278
+#define MV64340_INTEGRATED_SRAM_BASE_ADDR 0x268
+#define MV64340_SRAM_CONFIG 0x380
+
static struct resource mv643xx_eth_shared_resources[] = {
[0] = {
.name = "ethernet shared base",
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 0f3cd7c3a436..86d1f1cea571 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -13,6 +13,7 @@ config 32BIT
config RISCV
def_bool y
select ACPI_GENERIC_GSI if ACPI
+ select ACPI_MCFG if (ACPI && PCI)
select ACPI_PPTT if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_SPCR_TABLE if ACPI
@@ -188,6 +189,7 @@ config RISCV
select OF_EARLY_FLATTREE
select OF_IRQ
select PCI_DOMAINS_GENERIC if PCI
+ select PCI_ECAM if (ACPI && PCI)
select PCI_MSI if PCI
select RISCV_ALTERNATIVE if !XIP_KERNEL
select RISCV_APLIC
@@ -552,8 +554,8 @@ config RISCV_ISA_SVPBMT
config TOOLCHAIN_HAS_V
bool
default y
- depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64iv)
- depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32iv)
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64imv)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32imv)
depends on LLD_VERSION >= 140000 || LD_VERSION >= 23800
depends on AS_HAS_OPTION_ARCH
diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
index ca2d44d59d48..c7771b3b6475 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
@@ -365,6 +365,12 @@
};
};
+&syscrg {
+ assigned-clocks = <&syscrg JH7110_SYSCLK_CPU_CORE>,
+ <&pllclk JH7110_PLLCLK_PLL0_OUT>;
+ assigned-clock-rates = <500000000>, <1500000000>;
+};
+
&sysgpio {
i2c0_pins: i2c0-0 {
i2c-pins {
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index 8e10a94430a2..7e9a84a005ed 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -12,8 +12,63 @@
#include <asm-generic/irq.h>
+#define INVALID_CONTEXT UINT_MAX
+
void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void));
struct fwnode_handle *riscv_get_intc_hwnode(void);
+#ifdef CONFIG_ACPI
+
+enum riscv_irqchip_type {
+ ACPI_RISCV_IRQCHIP_INTC = 0x00,
+ ACPI_RISCV_IRQCHIP_IMSIC = 0x01,
+ ACPI_RISCV_IRQCHIP_PLIC = 0x02,
+ ACPI_RISCV_IRQCHIP_APLIC = 0x03,
+};
+
+int riscv_acpi_get_gsi_info(struct fwnode_handle *fwnode, u32 *gsi_base,
+ u32 *id, u32 *nr_irqs, u32 *nr_idcs);
+struct fwnode_handle *riscv_acpi_get_gsi_domain_id(u32 gsi);
+unsigned long acpi_rintc_index_to_hartid(u32 index);
+unsigned long acpi_rintc_ext_parent_to_hartid(unsigned int plic_id, unsigned int ctxt_idx);
+unsigned int acpi_rintc_get_plic_nr_contexts(unsigned int plic_id);
+unsigned int acpi_rintc_get_plic_context(unsigned int plic_id, unsigned int ctxt_idx);
+int __init acpi_rintc_get_imsic_mmio_info(u32 index, struct resource *res);
+
+#else
+static inline int riscv_acpi_get_gsi_info(struct fwnode_handle *fwnode, u32 *gsi_base,
+ u32 *id, u32 *nr_irqs, u32 *nr_idcs)
+{
+ return 0;
+}
+
+static inline unsigned long acpi_rintc_index_to_hartid(u32 index)
+{
+ return INVALID_HARTID;
+}
+
+static inline unsigned long acpi_rintc_ext_parent_to_hartid(unsigned int plic_id,
+ unsigned int ctxt_idx)
+{
+ return INVALID_HARTID;
+}
+
+static inline unsigned int acpi_rintc_get_plic_nr_contexts(unsigned int plic_id)
+{
+ return INVALID_CONTEXT;
+}
+
+static inline unsigned int acpi_rintc_get_plic_context(unsigned int plic_id, unsigned int ctxt_idx)
+{
+ return INVALID_CONTEXT;
+}
+
+static inline int __init acpi_rintc_get_imsic_mmio_info(u32 index, struct resource *res)
+{
+ return 0;
+}
+
+#endif /* CONFIG_ACPI */
+
#endif /* _ASM_RISCV_IRQ_H */
diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
index fa0f535bbbf0..1d85b6617508 100644
--- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
+++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
@@ -10,6 +10,7 @@
#define __KVM_VCPU_RISCV_PMU_H
#include <linux/perf/riscv_pmu.h>
+#include <asm/kvm_vcpu_insn.h>
#include <asm/sbi.h>
#ifdef CONFIG_RISCV_PMU_SBI
@@ -64,11 +65,11 @@ struct kvm_pmu {
#if defined(CONFIG_32BIT)
#define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
-{.base = CSR_CYCLEH, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm }, \
-{.base = CSR_CYCLE, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm },
+{.base = CSR_CYCLEH, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm }, \
+{.base = CSR_CYCLE, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm },
#else
#define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
-{.base = CSR_CYCLE, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm },
+{.base = CSR_CYCLE, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm },
#endif
int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid);
@@ -104,8 +105,20 @@ void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
struct kvm_pmu {
};
+static inline int kvm_riscv_vcpu_pmu_read_legacy(struct kvm_vcpu *vcpu, unsigned int csr_num,
+ unsigned long *val, unsigned long new_val,
+ unsigned long wr_mask)
+{
+ if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
+ *val = 0;
+ return KVM_INSN_CONTINUE_NEXT_SEPC;
+ } else {
+ return KVM_INSN_ILLEGAL_TRAP;
+ }
+}
+
#define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
-{.base = 0, .count = 0, .func = NULL },
+{.base = CSR_CYCLE, .count = 3, .func = kvm_riscv_vcpu_pmu_read_legacy },
static inline void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) {}
static inline int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 8702b8721a27..efa1b3519b23 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -14,36 +14,14 @@
#include <asm/ptrace.h>
-/*
- * addr is a hint to the maximum userspace address that mmap should provide, so
- * this macro needs to return the largest address space available so that
- * mmap_end < addr, being mmap_end the top of that address space.
- * See Documentation/arch/riscv/vm-layout.rst for more details.
- */
#define arch_get_mmap_end(addr, len, flags) \
({ \
- unsigned long mmap_end; \
- typeof(addr) _addr = (addr); \
- if ((_addr) == 0 || is_compat_task() || \
- ((_addr + len) > BIT(VA_BITS - 1))) \
- mmap_end = STACK_TOP_MAX; \
- else \
- mmap_end = (_addr + len); \
- mmap_end; \
+ STACK_TOP_MAX; \
})
#define arch_get_mmap_base(addr, base) \
({ \
- unsigned long mmap_base; \
- typeof(addr) _addr = (addr); \
- typeof(base) _base = (base); \
- unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base); \
- if ((_addr) == 0 || is_compat_task() || \
- ((_addr + len) > BIT(VA_BITS - 1))) \
- mmap_base = (_base); \
- else \
- mmap_base = (_addr + len) - rnd_gap; \
- mmap_base; \
+ base; \
})
#ifdef CONFIG_64BIT
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 7cffd4ffecd0..7bd3746028c9 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/cpumask.h>
+#include <linux/jump_label.h>
#ifdef CONFIG_RISCV_SBI
enum sbi_ext_id {
@@ -304,6 +305,7 @@ struct sbiret {
};
void sbi_init(void);
+long __sbi_base_ecall(int fid);
struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5,
@@ -373,7 +375,23 @@ static inline unsigned long sbi_mk_version(unsigned long major,
| (minor & SBI_SPEC_VERSION_MINOR_MASK);
}
-int sbi_err_map_linux_errno(int err);
+static inline int sbi_err_map_linux_errno(int err)
+{
+ switch (err) {
+ case SBI_SUCCESS:
+ return 0;
+ case SBI_ERR_DENIED:
+ return -EPERM;
+ case SBI_ERR_INVALID_PARAM:
+ return -EINVAL;
+ case SBI_ERR_INVALID_ADDRESS:
+ return -EFAULT;
+ case SBI_ERR_NOT_SUPPORTED:
+ case SBI_ERR_FAILURE:
+ default:
+ return -ENOTSUPP;
+ };
+}
extern bool sbi_debug_console_available;
int sbi_debug_console_write(const char *bytes, unsigned int num_bytes);
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 06d407f1b30b..7f88cc4931f5 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -20,17 +20,21 @@ endif
ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
CFLAGS_alternative.o := -mcmodel=medany
CFLAGS_cpufeature.o := -mcmodel=medany
+CFLAGS_sbi_ecall.o := -mcmodel=medany
ifdef CONFIG_FTRACE
CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_sbi_ecall.o = $(CC_FLAGS_FTRACE)
endif
ifdef CONFIG_RELOCATABLE
CFLAGS_alternative.o += -fno-pie
CFLAGS_cpufeature.o += -fno-pie
+CFLAGS_sbi_ecall.o += -fno-pie
endif
ifdef CONFIG_KASAN
KASAN_SANITIZE_alternative.o := n
KASAN_SANITIZE_cpufeature.o := n
+KASAN_SANITIZE_sbi_ecall.o := n
endif
endif
@@ -88,7 +92,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
-obj-$(CONFIG_RISCV_SBI) += sbi.o
+obj-$(CONFIG_RISCV_SBI) += sbi.o sbi_ecall.o
ifeq ($(CONFIG_RISCV_SBI), y)
obj-$(CONFIG_SMP) += sbi-ipi.o
obj-$(CONFIG_SMP) += cpu_ops_sbi.o
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
index ba957aaca5cb..6e0d333f57e5 100644
--- a/arch/riscv/kernel/acpi.c
+++ b/arch/riscv/kernel/acpi.c
@@ -311,29 +311,26 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
#ifdef CONFIG_PCI
/*
- * These interfaces are defined just to enable building ACPI core.
- * TODO: Update it with actual implementation when external interrupt
- * controller support is added in RISC-V ACPI.
+ * raw_pci_read/write - Platform-specific PCI config space access.
*/
-int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
- int reg, int len, u32 *val)
+int raw_pci_read(unsigned int domain, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *val)
{
- return PCIBIOS_DEVICE_NOT_FOUND;
-}
+ struct pci_bus *b = pci_find_bus(domain, bus);
-int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
- int reg, int len, u32 val)
-{
- return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!b)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ return b->ops->read(b, devfn, reg, len, val);
}
-int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
+int raw_pci_write(unsigned int domain, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 val)
{
- return -1;
-}
+ struct pci_bus *b = pci_find_bus(domain, bus);
-struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
-{
- return NULL;
+ if (!b)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ return b->ops->write(b, devfn, reg, len, val);
}
+
#endif /* CONFIG_PCI */
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 837bdab2601b..1989b8cade1b 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -14,9 +14,6 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
-#define CREATE_TRACE_POINTS
-#include <asm/trace.h>
-
/* default SBI version is 0.1 */
unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
@@ -27,55 +24,6 @@ static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5) __ro_after_init;
-struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
- unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5,
- int fid, int ext)
-{
- struct sbiret ret;
-
- trace_sbi_call(ext, fid);
-
- register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
- register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
- register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
- register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
- register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
- register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
- register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
- register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
- asm volatile ("ecall"
- : "+r" (a0), "+r" (a1)
- : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
- : "memory");
- ret.error = a0;
- ret.value = a1;
-
- trace_sbi_return(ext, ret.error, ret.value);
-
- return ret;
-}
-EXPORT_SYMBOL(__sbi_ecall);
-
-int sbi_err_map_linux_errno(int err)
-{
- switch (err) {
- case SBI_SUCCESS:
- return 0;
- case SBI_ERR_DENIED:
- return -EPERM;
- case SBI_ERR_INVALID_PARAM:
- return -EINVAL;
- case SBI_ERR_INVALID_ADDRESS:
- return -EFAULT;
- case SBI_ERR_NOT_SUPPORTED:
- case SBI_ERR_FAILURE:
- default:
- return -ENOTSUPP;
- };
-}
-EXPORT_SYMBOL(sbi_err_map_linux_errno);
-
#ifdef CONFIG_RISCV_SBI_V01
static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
{
@@ -535,17 +483,6 @@ long sbi_probe_extension(int extid)
}
EXPORT_SYMBOL(sbi_probe_extension);
-static long __sbi_base_ecall(int fid)
-{
- struct sbiret ret;
-
- ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
- if (!ret.error)
- return ret.value;
- else
- return sbi_err_map_linux_errno(ret.error);
-}
-
static inline long sbi_get_spec_version(void)
{
return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION);
diff --git a/arch/riscv/kernel/sbi_ecall.c b/arch/riscv/kernel/sbi_ecall.c
new file mode 100644
index 000000000000..24aabb4fbde3
--- /dev/null
+++ b/arch/riscv/kernel/sbi_ecall.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Rivos Inc. */
+
+#include <asm/sbi.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
+
+long __sbi_base_ecall(int fid)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
+ if (!ret.error)
+ return ret.value;
+ else
+ return sbi_err_map_linux_errno(ret.error);
+}
+EXPORT_SYMBOL(__sbi_base_ecall);
+
+struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5,
+ int fid, int ext)
+{
+ struct sbiret ret;
+
+ trace_sbi_call(ext, fid);
+
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ asm volatile ("ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ trace_sbi_return(ext, ret.error, ret.value);
+
+ return ret;
+}
+EXPORT_SYMBOL(__sbi_ecall);
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 192cd5603e95..d4fd8af7aaf5 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -417,7 +417,7 @@ int handle_misaligned_load(struct pt_regs *regs)
val.data_u64 = 0;
if (user_mode(regs)) {
- if (raw_copy_from_user(&val, (u8 __user *)addr, len))
+ if (copy_from_user(&val, (u8 __user *)addr, len))
return -1;
} else {
memcpy(&val, (u8 *)addr, len);
@@ -515,7 +515,7 @@ int handle_misaligned_store(struct pt_regs *regs)
return -EOPNOTSUPP;
if (user_mode(regs)) {
- if (raw_copy_to_user((u8 __user *)addr, &val, len))
+ if (copy_to_user((u8 __user *)addr, &val, len))
return -1;
} else {
memcpy((u8 *)addr, &val, len);
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index bcf41d6e0df0..2707a51b082c 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -391,19 +391,9 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
- int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
- if (kvpmu->sdata) {
- if (kvpmu->snapshot_addr != INVALID_GPA) {
- memset(kvpmu->sdata, 0, snapshot_area_size);
- kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr,
- kvpmu->sdata, snapshot_area_size);
- } else {
- pr_warn("snapshot address invalid\n");
- }
- kfree(kvpmu->sdata);
- kvpmu->sdata = NULL;
- }
+ kfree(kvpmu->sdata);
+ kvpmu->sdata = NULL;
kvpmu->snapshot_addr = INVALID_GPA;
}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 62f409d4176e..7de128be8db9 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -127,8 +127,8 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->riscv_sbi.args[3] = cp->a3;
run->riscv_sbi.args[4] = cp->a4;
run->riscv_sbi.args[5] = cp->a5;
- run->riscv_sbi.ret[0] = cp->a0;
- run->riscv_sbi.ret[1] = cp->a1;
+ run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED;
+ run->riscv_sbi.ret[1] = 0;
}
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index a03c994eed3b..b81672729887 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -158,6 +158,7 @@ void __init riscv_init_cbo_blocksizes(void)
#ifdef CONFIG_SMP
static void set_icache_stale_mask(void)
{
+ int cpu = get_cpu();
cpumask_t *mask;
bool stale_cpu;
@@ -168,10 +169,11 @@ static void set_icache_stale_mask(void)
* concurrently on different harts.
*/
mask = &current->mm->context.icache_stale_mask;
- stale_cpu = cpumask_test_cpu(smp_processor_id(), mask);
+ stale_cpu = cpumask_test_cpu(cpu, mask);
cpumask_setall(mask);
- cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu);
+ cpumask_assign_cpu(cpu, mask, stale_cpu);
+ put_cpu();
}
#endif
@@ -239,14 +241,12 @@ int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope)
case PR_RISCV_CTX_SW_FENCEI_OFF:
switch (scope) {
case PR_RISCV_SCOPE_PER_PROCESS:
- current->mm->context.force_icache_flush = false;
-
set_icache_stale_mask();
+ current->mm->context.force_icache_flush = false;
break;
case PR_RISCV_SCOPE_PER_THREAD:
- current->thread.force_icache_flush = false;
-
set_icache_stale_mask();
+ current->thread.force_icache_flush = false;
break;
default:
return -EINVAL;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index eb0649a61b4c..1785782c2e55 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -252,7 +252,7 @@ static void __init setup_bootmem(void)
* The size of the linear page mapping may restrict the amount of
* usable RAM.
*/
- if (IS_ENABLED(CONFIG_64BIT)) {
+ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) {
max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE;
memblock_cap_memory_range(phys_ram_base,
max_mapped_addr - phys_ram_base);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a822f952f64a..c60e699e99f5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -604,6 +604,19 @@ config RANDOMIZE_BASE
as a security feature that deters exploit attempts relying on
knowledge of the location of kernel internals.
+config RANDOMIZE_IDENTITY_BASE
+ bool "Randomize the address of the identity mapping base"
+ depends on RANDOMIZE_BASE
+ default DEBUG_VM
+ help
+ The identity mapping base address is pinned to zero by default.
+ Allow randomization of that base to expose otherwise missed
+ notion of physical and virtual addresses of data structures.
+ That does not have any impact on the base address at which the
+ kernel image is loaded.
+
+ If unsure, say N
+
config KERNEL_IMAGE_BASE
hex "Kernel image base address"
range 0x100000 0x1FFFFFE0000000 if !KASAN
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index ce232552bc1c..c73b5118ad42 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -162,7 +162,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
loc = (long)*reloc + phys_offset;
if (loc < min_addr || loc > max_addr)
error("64-bit relocation outside of kernel!\n");
- *(u64 *)loc += offset - __START_KERNEL;
+ *(u64 *)loc += offset;
}
}
@@ -177,7 +177,7 @@ static void kaslr_adjust_got(unsigned long offset)
*/
for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
if (*entry)
- *entry += offset - __START_KERNEL;
+ *entry += offset;
}
}
@@ -252,7 +252,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */
- BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
@@ -341,7 +341,8 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
max_mappable = min(max_mappable, vmemmap_start);
- __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE))
+ __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
return asce_limit;
}
@@ -388,31 +389,25 @@ static void kaslr_adjust_vmlinux_info(long offset)
#endif
}
-static void fixup_vmlinux_info(void)
-{
- vmlinux.entry -= __START_KERNEL;
- kaslr_adjust_vmlinux_info(-__START_KERNEL);
-}
-
void startup_kernel(void)
{
- unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
- unsigned long nokaslr_offset_phys, kaslr_large_page_offset;
- unsigned long amode31_lma = 0;
+ unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
+ unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0;
+ unsigned long kernel_size = TEXT_OFFSET + vmlinux_size;
+ unsigned long kaslr_large_page_offset;
unsigned long max_physmem_end;
unsigned long asce_limit;
unsigned long safe_addr;
psw_t psw;
- fixup_vmlinux_info();
setup_lpp();
/*
* Non-randomized kernel physical start address must be _SEGMENT_SIZE
* aligned (see blow).
*/
- nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
- safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
+ nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
+ safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size);
/*
* Reserve decompressor memory together with decompression heap,
@@ -456,16 +451,27 @@ void startup_kernel(void)
*/
kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK;
if (kaslr_enabled()) {
- unsigned long end = ident_map_size - kaslr_large_page_offset;
+ unsigned long size = vmlinux_size + kaslr_large_page_offset;
- __kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end);
+ text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size);
}
- if (!__kaslr_offset_phys)
- __kaslr_offset_phys = nokaslr_offset_phys;
- __kaslr_offset_phys |= kaslr_large_page_offset;
+ if (!text_lma)
+ text_lma = nokaslr_text_lma;
+ text_lma |= kaslr_large_page_offset;
+
+ /*
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is
+ * never accessed via the kernel image mapping as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, this region could be used for something else and does
+ * not need to be reserved. See how it is skipped in setup_vmem().
+ */
+ __kaslr_offset_phys = text_lma - TEXT_OFFSET;
kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
- physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
- deploy_kernel((void *)__kaslr_offset_phys);
+ physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size);
+ deploy_kernel((void *)text_lma);
/* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
@@ -488,7 +494,7 @@ void startup_kernel(void)
amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
}
if (!amode31_lma)
- amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
+ amode31_lma = text_lma - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/*
@@ -504,8 +510,8 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes
* to bootdata made by setup_vmem()
*/
- clear_bss_section(__kaslr_offset_phys);
- kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size,
+ clear_bss_section(text_lma);
+ kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size,
__kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 2847cc059ab7..145035f84a0e 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -90,7 +90,7 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
}
memgap_start = end;
}
- kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
@@ -475,7 +475,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
(unsigned long)__identity_va(end),
POPULATE_IDENTITY);
}
- pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL);
+
+ /*
+ * [kernel_start..kernel_start + TEXT_OFFSET] region is never
+ * accessed as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
+ */
+ pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE);
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index a750711d44c8..66670212a361 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -109,7 +109,12 @@ SECTIONS
#ifdef CONFIG_KERNEL_UNCOMPRESSED
. = ALIGN(PAGE_SIZE);
. += AMODE31_SIZE; /* .amode31 section */
- . = ALIGN(1 << 20); /* _SEGMENT_SIZE */
+
+ /*
+ * Make sure the location counter is not less than TEXT_OFFSET.
+ * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead.
+ */
+ . = MAX(TEXT_OFFSET, ALIGN(1 << 20));
#else
. = ALIGN(8);
#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 06416b3f94f5..16e4caa931f1 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -279,8 +279,9 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define AMODE31_SIZE (3 * PAGE_SIZE)
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#define __START_KERNEL 0x100000
#define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE
#define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE)
+#define TEXT_OFFSET 0x100000
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 4ec99f73fa27..a3fea683b227 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -734,7 +734,23 @@ static void __init memblock_add_physmem_info(void)
}
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * Reserve memory used for lowcore.
+ */
+static void __init reserve_lowcore(void)
+{
+ void *lowcore_start = get_lowcore();
+ void *lowcore_end = lowcore_start + sizeof(struct lowcore);
+ void *start, *end;
+
+ if ((void *)__identity_base < lowcore_end) {
+ start = max(lowcore_start, (void *)__identity_base);
+ end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
+ memblock_reserve(__pa(start), __pa(end));
+ }
+}
+
+/*
+ * Reserve memory used for absolute lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
@@ -918,6 +934,7 @@ void __init setup_arch(char **cmdline_p)
/* Do some memory reservations *before* memory is added to memblock */
reserve_pgtables();
+ reserve_lowcore();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index e67cd409b858..ae5d0a9d6911 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -39,7 +39,7 @@ PHDRS {
SECTIONS
{
- . = __START_KERNEL;
+ . = TEXT_OFFSET;
.text : {
_stext = .; /* Start of text section */
_text = .; /* Text and read-only data */
diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c
index a74dbd5c9896..30a732c808f3 100644
--- a/arch/s390/tools/relocs.c
+++ b/arch/s390/tools/relocs.c
@@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel)
case R_390_GOTOFF64:
break;
case R_390_64:
- add_reloc(&relocs64, offset - ehdr.e_entry);
+ add_reloc(&relocs64, offset);
break;
default:
die("Unsupported relocation type: %d\n", r_type);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1aa3c4a0c5b2..e9103998cca9 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -14,6 +14,7 @@ config SUPERH
select ARCH_HIBERNATION_POSSIBLE if MMU
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_NEED_CMPXCHG_1_EMU
select CPU_NO_EFFICIENT_FFS
select DMA_DECLARE_COHERENT
select GENERIC_ATOMIC64
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index 5d617b3ef78f..1e5dc5ccf7bf 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -9,6 +9,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
+#include <linux/cmpxchg-emu.h>
#if defined(CONFIG_GUSA_RB)
#include <asm/cmpxchg-grb.h>
@@ -56,6 +57,8 @@ static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
unsigned long new, int size)
{
switch (size) {
+ case 1:
+ return cmpxchg_emu_u8(ptr, old, new);
case 4:
return __cmpxchg_u32(ptr, old, new);
}
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 682da3714686..57084ed2f3c4 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -133,6 +133,12 @@
#define SO_PASSPIDFD 0x0055
#define SO_PEERPIDFD 0x0056
+#define SO_DEVMEM_LINEAR 0x0057
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 0x0058
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 0x0059
+
#if !defined(__KERNEL__)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 007bab9f2a0e..6f1c31f4b9ab 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1889,6 +1889,10 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
If unsure, say y.
+config ARCH_PKEY_BITS
+ int
+ default 4
+
choice
prompt "TSX enable mode"
depends on CPU_SUP_INTEL
@@ -2610,24 +2614,15 @@ config MITIGATION_SLS
against straight line speculation. The kernel image might be slightly
larger.
-config MITIGATION_GDS_FORCE
- bool "Force GDS Mitigation"
+config MITIGATION_GDS
+ bool "Mitigate Gather Data Sampling"
depends on CPU_SUP_INTEL
- default n
+ default y
help
- Gather Data Sampling (GDS) is a hardware vulnerability which allows
- unprivileged speculative access to data which was previously stored in
- vector registers.
-
- This option is equivalent to setting gather_data_sampling=force on the
- command line. The microcode mitigation is used if present, otherwise
- AVX is disabled as a mitigation. On affected systems that are missing
- the microcode any userspace code that unconditionally uses AVX will
- break with this option set.
-
- Setting this option on systems not vulnerable to GDS has no effect.
-
- If in doubt, say N.
+ Enable mitigation for Gather Data Sampling (GDS). GDS is a hardware
+ vulnerability which allows unprivileged speculative access to data
+ which was previously stored in vector registers. The attacker uses gather
+ instructions to infer the stale vector register data.
config MITIGATION_RFDS
bool "RFDS Mitigation"
@@ -2650,6 +2645,107 @@ config MITIGATION_SPECTRE_BHI
indirect branches.
See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+config MITIGATION_MDS
+ bool "Mitigate Microarchitectural Data Sampling (MDS) hardware bug"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable mitigation for Microarchitectural Data Sampling (MDS). MDS is
+ a hardware vulnerability which allows unprivileged speculative access
+ to data which is available in various CPU internal buffers.
+ See also <file:Documentation/admin-guide/hw-vuln/mds.rst>
+
+config MITIGATION_TAA
+ bool "Mitigate TSX Asynchronous Abort (TAA) hardware bug"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable mitigation for TSX Asynchronous Abort (TAA). TAA is a hardware
+ vulnerability that allows unprivileged speculative access to data
+ which is available in various CPU internal buffers by using
+ asynchronous aborts within an Intel TSX transactional region.
+ See also <file:Documentation/admin-guide/hw-vuln/tsx_async_abort.rst>
+
+config MITIGATION_MMIO_STALE_DATA
+ bool "Mitigate MMIO Stale Data hardware bug"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable mitigation for MMIO Stale Data hardware bugs. Processor MMIO
+ Stale Data Vulnerabilities are a class of memory-mapped I/O (MMIO)
+ vulnerabilities that can expose data. The vulnerabilities require the
+ attacker to have access to MMIO.
+ See also
+ <file:Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst>
+
+config MITIGATION_L1TF
+ bool "Mitigate L1 Terminal Fault (L1TF) hardware bug"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Mitigate L1 Terminal Fault (L1TF) hardware bug. L1 Terminal Fault is a
+ hardware vulnerability which allows unprivileged speculative access to data
+ available in the Level 1 Data Cache.
+ See <file:Documentation/admin-guide/hw-vuln/l1tf.rst
+
+config MITIGATION_RETBLEED
+ bool "Mitigate RETBleed hardware bug"
+ depends on (CPU_SUP_INTEL && MITIGATION_SPECTRE_V2) || MITIGATION_UNRET_ENTRY || MITIGATION_IBPB_ENTRY
+ default y
+ help
+ Enable mitigation for RETBleed (Arbitrary Speculative Code Execution
+ with Return Instructions) vulnerability. RETBleed is a speculative
+ execution attack which takes advantage of microarchitectural behavior
+ in many modern microprocessors, similar to Spectre v2. An
+ unprivileged attacker can use these flaws to bypass conventional
+ memory security restrictions to gain read access to privileged memory
+ that would otherwise be inaccessible.
+
+config MITIGATION_SPECTRE_V1
+ bool "Mitigate SPECTRE V1 hardware bug"
+ default y
+ help
+ Enable mitigation for Spectre V1 (Bounds Check Bypass). Spectre V1 is a
+ class of side channel attacks that takes advantage of speculative
+ execution that bypasses conditional branch instructions used for
+ memory access bounds check.
+ See also <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
+config MITIGATION_SPECTRE_V2
+ bool "Mitigate SPECTRE V2 hardware bug"
+ default y
+ help
+ Enable mitigation for Spectre V2 (Branch Target Injection). Spectre
+ V2 is a class of side channel attacks that takes advantage of
+ indirect branch predictors inside the processor. In Spectre variant 2
+ attacks, the attacker can steer speculative indirect branches in the
+ victim to gadget code by poisoning the branch target buffer of a CPU
+ used for predicting indirect branch addresses.
+ See also <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
+config MITIGATION_SRBDS
+ bool "Mitigate Special Register Buffer Data Sampling (SRBDS) hardware bug"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable mitigation for Special Register Buffer Data Sampling (SRBDS).
+ SRBDS is a hardware vulnerability that allows Microarchitectural Data
+ Sampling (MDS) techniques to infer values returned from special
+ register accesses. An unprivileged user can extract values returned
+ from RDRAND and RDSEED executed on another core or sibling thread
+ using MDS techniques.
+ See also
+ <file:Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst>
+
+config MITIGATION_SSB
+ bool "Mitigate Speculative Store Bypass (SSB) hardware bug"
+ default y
+ help
+ Enable mitigation for Speculative Store Bypass (SSB). SSB is a
+ hardware security vulnerability and its exploitation takes advantage
+ of speculative execution in a similar way to the Meltdown and Spectre
+ security vulnerabilities.
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 078e2bac2553..da8b66dce0da 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -389,7 +389,6 @@ static bool mmio_read(int size, unsigned long addr, unsigned long *val)
.r12 = size,
.r13 = EPT_READ,
.r14 = addr,
- .r15 = *val,
};
if (__tdx_hypercall(&args))
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 24875e6295f2..7b1bebed879d 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -14,7 +14,7 @@ config CRYPTO_CURVE25519_X86
- ADX (large integer arithmetic)
config CRYPTO_AES_NI_INTEL
- tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTR, XTS, GCM (AES-NI)"
+ tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XCTR, XTS, GCM (AES-NI/VAES)"
depends on X86
select CRYPTO_AEAD
select CRYPTO_LIB_AES
@@ -25,10 +25,14 @@ config CRYPTO_AES_NI_INTEL
help
Block cipher: AES cipher algorithms
AEAD cipher: AES with GCM
- Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTR, XTS
+ Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XCTR, XTS
Architecture: x86 (32-bit and 64-bit) using:
- AES-NI (AES new instructions)
+ - VAES (Vector AES)
+
+ Some algorithm implementations are supported only in 64-bit builds,
+ and some have additional prerequisites such as AVX2 or AVX512.
config CRYPTO_BLOWFISH_X86_64
tristate "Ciphers: Blowfish, modes: ECB, CBC"
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index cd37de5ec404..b0dd83555499 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1366,6 +1366,8 @@ gcm_crypt(struct aead_request *req, int flags)
err = skcipher_walk_aead_encrypt(&walk, req, false);
else
err = skcipher_walk_aead_decrypt(&walk, req, false);
+ if (err)
+ return err;
/*
* Since the AES-GCM assembly code requires that at least three assembly
@@ -1381,37 +1383,31 @@ gcm_crypt(struct aead_request *req, int flags)
gcm_process_assoc(key, ghash_acc, req->src, assoclen, flags);
/* En/decrypt the data and pass the ciphertext through GHASH. */
- while ((nbytes = walk.nbytes) != 0) {
- if (unlikely(nbytes < walk.total)) {
- /*
- * Non-last segment. In this case, the assembly
- * function requires that the length be a multiple of 16
- * (AES_BLOCK_SIZE) bytes. The needed buffering of up
- * to 16 bytes is handled by the skcipher_walk. Here we
- * just need to round down to a multiple of 16.
- */
- nbytes = round_down(nbytes, AES_BLOCK_SIZE);
- aes_gcm_update(key, le_ctr, ghash_acc,
- walk.src.virt.addr, walk.dst.virt.addr,
- nbytes, flags);
- le_ctr[0] += nbytes / AES_BLOCK_SIZE;
- kernel_fpu_end();
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- kernel_fpu_begin();
- } else {
- /* Last segment: process all remaining data. */
- aes_gcm_update(key, le_ctr, ghash_acc,
- walk.src.virt.addr, walk.dst.virt.addr,
- nbytes, flags);
- err = skcipher_walk_done(&walk, 0);
- /*
- * The low word of the counter isn't used by the
- * finalize, so there's no need to increment it here.
- */
- }
+ while (unlikely((nbytes = walk.nbytes) < walk.total)) {
+ /*
+ * Non-last segment. In this case, the assembly function
+ * requires that the length be a multiple of 16 (AES_BLOCK_SIZE)
+ * bytes. The needed buffering of up to 16 bytes is handled by
+ * the skcipher_walk. Here we just need to round down to a
+ * multiple of 16.
+ */
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+ aes_gcm_update(key, le_ctr, ghash_acc, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes, flags);
+ le_ctr[0] += nbytes / AES_BLOCK_SIZE;
+ kernel_fpu_end();
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ if (err)
+ return err;
+ kernel_fpu_begin();
}
- if (err)
- goto out;
+ /* Last segment: process all remaining data. */
+ aes_gcm_update(key, le_ctr, ghash_acc, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes, flags);
+ /*
+ * The low word of the counter isn't used by the finalize, so there's no
+ * need to increment it here.
+ */
/* Finalize */
taglen = crypto_aead_authsize(tfm);
@@ -1439,8 +1435,9 @@ gcm_crypt(struct aead_request *req, int flags)
datalen, tag, taglen, flags))
err = -EBADMSG;
}
-out:
kernel_fpu_end();
+ if (nbytes)
+ skcipher_walk_done(&walk, 0);
return err;
}
@@ -1753,6 +1750,6 @@ static void __exit aesni_exit(void)
late_initcall(aesni_init);
module_exit(aesni_exit);
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
+MODULE_DESCRIPTION("AES cipher and modes, optimized with AES-NI or VAES instructions");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 0ffb072be956..0bbec1c75cd0 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -592,22 +592,22 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx)
leaq K256+0*32(%rip), INP ## reuse INP as scratch reg
vpaddd (INP, SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
- FOUR_ROUNDS_AND_SCHED _XFER + 0*32
+ FOUR_ROUNDS_AND_SCHED (_XFER + 0*32)
leaq K256+1*32(%rip), INP
vpaddd (INP, SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
- FOUR_ROUNDS_AND_SCHED _XFER + 1*32
+ FOUR_ROUNDS_AND_SCHED (_XFER + 1*32)
leaq K256+2*32(%rip), INP
vpaddd (INP, SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
- FOUR_ROUNDS_AND_SCHED _XFER + 2*32
+ FOUR_ROUNDS_AND_SCHED (_XFER + 2*32)
leaq K256+3*32(%rip), INP
vpaddd (INP, SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
- FOUR_ROUNDS_AND_SCHED _XFER + 3*32
+ FOUR_ROUNDS_AND_SCHED (_XFER + 3*32)
add $4*32, SRND
cmp $3*4*32, SRND
@@ -618,12 +618,12 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx)
leaq K256+0*32(%rip), INP
vpaddd (INP, SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
- DO_4ROUNDS _XFER + 0*32
+ DO_4ROUNDS (_XFER + 0*32)
leaq K256+1*32(%rip), INP
vpaddd (INP, SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
- DO_4ROUNDS _XFER + 1*32
+ DO_4ROUNDS (_XFER + 1*32)
add $2*32, SRND
vmovdqa X2, X0
@@ -651,8 +651,8 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx)
xor SRND, SRND
.align 16
.Lloop3:
- DO_4ROUNDS _XFER + 0*32 + 16
- DO_4ROUNDS _XFER + 1*32 + 16
+ DO_4ROUNDS (_XFER + 0*32 + 16)
+ DO_4ROUNDS (_XFER + 1*32 + 16)
add $2*32, SRND
cmp $4*4*32, SRND
jb .Lloop3
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0c9c2706d4ec..9e519d8a810a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4589,6 +4589,25 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
return HYBRID_INTEL_CORE;
}
+static inline bool erratum_hsw11(struct perf_event *event)
+{
+ return (event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+ X86_CONFIG(.event=0xc0, .umask=0x01);
+}
+
+/*
+ * The HSW11 requires a period larger than 100 which is the same as the BDM11.
+ * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
+ *
+ * The message 'interrupt took too long' can be observed on any counter which
+ * was armed with a period < 32 and two events expired in the same NMI.
+ * A minimum period of 32 is enforced for the rest of the events.
+ */
+static void hsw_limit_period(struct perf_event *event, s64 *left)
+{
+ *left = max(*left, erratum_hsw11(event) ? 128 : 32);
+}
+
/*
* Broadwell:
*
@@ -4606,8 +4625,7 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
*/
static void bdw_limit_period(struct perf_event *event, s64 *left)
{
- if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
- X86_CONFIG(.event=0xc0, .umask=0x01)) {
+ if (erratum_hsw11(event)) {
if (*left < 128)
*left = 128;
*left &= ~0x3fULL;
@@ -6766,6 +6784,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.limit_period = hsw_limit_period;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 17a71e92a343..95eada2994e1 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -35,7 +35,6 @@
#include <clocksource/hyperv_timer.h>
#include <linux/highmem.h>
-int hyperv_init_cpuhp;
u64 hv_current_partition_id = ~0ull;
EXPORT_SYMBOL_GPL(hv_current_partition_id);
@@ -607,8 +606,6 @@ skip_hypercall_pg_init:
register_syscore_ops(&hv_syscore_ops);
- hyperv_init_cpuhp = cpuhp;
-
if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID)
hv_get_partition_id();
@@ -637,7 +634,7 @@ skip_hypercall_pg_init:
clean_guest_os_id:
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
- cpuhp_remove_state(cpuhp);
+ cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE);
free_ghcb_page:
free_percpu(hv_ghcb_pg);
free_vp_assist_page:
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index 3831f612e89c..e4121d9aa9e1 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -193,26 +193,6 @@
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
/**
- * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model
- * @model: The model name without the INTEL_FAM6_ prefix or ANY
- * The model name is expanded to INTEL_FAM6_@model internally
- * @data: Driver specific data or NULL. The internal storage
- * format is unsigned long. The supplied value, pointer
- * etc. is casted to unsigned long internally.
- *
- * The vendor is set to INTEL, the family to 6 and all other missing
- * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards.
- *
- * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information.
- */
-#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
- X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
-
-#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data) \
- X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
- steppings, X86_FEATURE_ANY, data)
-
-/**
* X86_MATCH_VFM - Match encoded vendor/family/model
* @vfm: Encoded 8-bits each for vendor, family, model
* @data: Driver specific data or NULL. The internal storage
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index eb17f31b06d2..de16862bf230 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -591,6 +591,13 @@ struct fpu_state_config {
* even without XSAVE support, i.e. legacy features FP + SSE
*/
u64 legacy_features;
+ /*
+ * @independent_features:
+ *
+ * Features that are supported by XSAVES, but not managed as part of
+ * the FPU core, such as LBR
+ */
+ u64 independent_features;
};
/* FPU state configuration information */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index f81a851c46dc..44949f972826 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -10,7 +10,7 @@
* that group keep the CPUID for the variants sorted by model number.
*
* The defined symbol names have the following form:
- * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF}
+ * INTEL_{OPTFAMILY}_{MICROARCH}{OPTDIFF}
* where:
* OPTFAMILY Describes the family of CPUs that this belongs to. Default
* is assumed to be "_CORE" (and should be omitted). Other values
@@ -42,215 +42,136 @@
#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model)
-/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
-#define INTEL_FAM6_ANY X86_MODEL_ANY
-/* Wildcard match for FAM6 so X86_MATCH_VFM(ANY) works */
+/* Wildcard match so X86_MATCH_VFM(ANY) works */
#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
-#define INTEL_FAM6_CORE_YONAH 0x0E
+#define INTEL_PENTIUM_PRO IFM(6, 0x01)
+
#define INTEL_CORE_YONAH IFM(6, 0x0E)
-#define INTEL_FAM6_CORE2_MEROM 0x0F
#define INTEL_CORE2_MEROM IFM(6, 0x0F)
-#define INTEL_FAM6_CORE2_MEROM_L 0x16
#define INTEL_CORE2_MEROM_L IFM(6, 0x16)
-#define INTEL_FAM6_CORE2_PENRYN 0x17
#define INTEL_CORE2_PENRYN IFM(6, 0x17)
-#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D)
-#define INTEL_FAM6_NEHALEM 0x1E
#define INTEL_NEHALEM IFM(6, 0x1E)
-#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */
-#define INTEL_FAM6_NEHALEM_EP 0x1A
#define INTEL_NEHALEM_EP IFM(6, 0x1A)
-#define INTEL_FAM6_NEHALEM_EX 0x2E
#define INTEL_NEHALEM_EX IFM(6, 0x2E)
-#define INTEL_FAM6_WESTMERE 0x25
#define INTEL_WESTMERE IFM(6, 0x25)
-#define INTEL_FAM6_WESTMERE_EP 0x2C
#define INTEL_WESTMERE_EP IFM(6, 0x2C)
-#define INTEL_FAM6_WESTMERE_EX 0x2F
#define INTEL_WESTMERE_EX IFM(6, 0x2F)
-#define INTEL_FAM6_SANDYBRIDGE 0x2A
#define INTEL_SANDYBRIDGE IFM(6, 0x2A)
-#define INTEL_FAM6_SANDYBRIDGE_X 0x2D
#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D)
-#define INTEL_FAM6_IVYBRIDGE 0x3A
#define INTEL_IVYBRIDGE IFM(6, 0x3A)
-#define INTEL_FAM6_IVYBRIDGE_X 0x3E
#define INTEL_IVYBRIDGE_X IFM(6, 0x3E)
-#define INTEL_FAM6_HASWELL 0x3C
#define INTEL_HASWELL IFM(6, 0x3C)
-#define INTEL_FAM6_HASWELL_X 0x3F
#define INTEL_HASWELL_X IFM(6, 0x3F)
-#define INTEL_FAM6_HASWELL_L 0x45
#define INTEL_HASWELL_L IFM(6, 0x45)
-#define INTEL_FAM6_HASWELL_G 0x46
#define INTEL_HASWELL_G IFM(6, 0x46)
-#define INTEL_FAM6_BROADWELL 0x3D
#define INTEL_BROADWELL IFM(6, 0x3D)
-#define INTEL_FAM6_BROADWELL_G 0x47
#define INTEL_BROADWELL_G IFM(6, 0x47)
-#define INTEL_FAM6_BROADWELL_X 0x4F
#define INTEL_BROADWELL_X IFM(6, 0x4F)
-#define INTEL_FAM6_BROADWELL_D 0x56
#define INTEL_BROADWELL_D IFM(6, 0x56)
-#define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */
#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */
-#define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */
#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */
-#define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */
#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */
/* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */
/* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */
-#define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */
#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */
/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */
/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */
/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */
-#define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */
#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */
/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */
-#define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */
#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */
-#define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */
#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */
-#define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */
#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */
-#define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */
#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */
#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */
#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */
#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */
#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */
-#define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */
#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */
-#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */
-#define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */
#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */
-#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */
-#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF)
-#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD)
-#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
/* "Hybrid" Processors (P-Core/E-Core) */
-#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */
-#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */
-#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */
-#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */
#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */
-#define INTEL_FAM6_RAPTORLAKE_P 0xBA
#define INTEL_RAPTORLAKE_P IFM(6, 0xBA)
-#define INTEL_FAM6_RAPTORLAKE_S 0xBF
#define INTEL_RAPTORLAKE_S IFM(6, 0xBF)
-#define INTEL_FAM6_METEORLAKE 0xAC
#define INTEL_METEORLAKE IFM(6, 0xAC)
-#define INTEL_FAM6_METEORLAKE_L 0xAA
#define INTEL_METEORLAKE_L IFM(6, 0xAA)
-#define INTEL_FAM6_ARROWLAKE_H 0xC5
#define INTEL_ARROWLAKE_H IFM(6, 0xC5)
-#define INTEL_FAM6_ARROWLAKE 0xC6
#define INTEL_ARROWLAKE IFM(6, 0xC6)
-#define INTEL_FAM6_ARROWLAKE_U 0xB5
#define INTEL_ARROWLAKE_U IFM(6, 0xB5)
-#define INTEL_FAM6_LUNARLAKE_M 0xBD
#define INTEL_LUNARLAKE_M IFM(6, 0xBD)
/* "Small Core" Processors (Atom/E-Core) */
-#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */
-#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */
-#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */
-#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */
-#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */
-#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */
#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */
-#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */
-#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */
-#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */
-#define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */
#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */
-#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */
#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */
/* Note: the micro-architecture is "Goldmont Plus" */
-#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */
-#define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */
#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */
-#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */
-#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */
-#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */
#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */
-#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */
-#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */
-#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */
/* Xeon Phi */
-#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */
-#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */
/* Family 5 */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3ad29b128943..3b9970117a0f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -221,7 +221,7 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; }
#endif
-void mce_setup(struct mce *m);
+void mce_prep_record(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 390c4d13956d..5f0bc6a6d025 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -40,7 +40,6 @@ static inline unsigned char hv_get_nmi_reason(void)
}
#if IS_ENABLED(CONFIG_HYPERV)
-extern int hyperv_init_cpuhp;
extern bool hyperv_paravisor_present;
extern void *hv_hypercall_pg;
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index af4302d79b59..f3d257c45225 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -17,6 +17,7 @@ extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
+extern unsigned long physmem_end;
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 9053dfe9fa03..a98e53491a4e 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -140,6 +140,10 @@ extern unsigned int ptrs_per_p4d;
# define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+#ifdef CONFIG_RANDOMIZE_MEMORY
+# define PHYSMEM_END physmem_end
+#endif
+
/*
* End of the region for which vmalloc page tables are pre-allocated.
* For non-KMSAN builds, this is the same as VMALLOC_END.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a75a07f4931f..775acbdea1a9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -691,8 +691,6 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
}
#ifdef CONFIG_CPU_SUP_AMD
-extern u32 amd_get_highest_perf(void);
-
/*
* Issue a DIV 0/1 insn to clear any division data from previous DIV
* operations.
@@ -705,7 +703,6 @@ static __always_inline void amd_clear_divider(void)
extern void amd_check_microcode(void);
#else
-static inline u32 amd_get_highest_perf(void) { return 0; }
static inline void amd_clear_divider(void) { }
static inline void amd_check_microcode(void) { }
#endif
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 12dbd2588ca7..8b1b6ce1e51b 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -156,12 +156,6 @@ static inline void resctrl_sched_in(struct task_struct *tsk)
__resctrl_sched_in(tsk);
}
-static inline u32 resctrl_arch_system_num_rmid_idx(void)
-{
- /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
- return boot_cpu_data.x86_cache_max_rmid + 1;
-}
-
static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
{
*rmid = idx;
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 79bbe2be900e..ee34ab00a8d6 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -164,7 +164,7 @@ struct snp_guest_msg_hdr {
struct snp_guest_msg {
struct snp_guest_msg_hdr hdr;
- u8 payload[4000];
+ u8 payload[PAGE_SIZE - sizeof(struct snp_guest_msg_hdr)];
} __packed;
struct sev_guest_platform_data {
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index abe3a8f22cbd..aef70336d624 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -282,9 +282,22 @@ static inline long arch_scale_freq_capacity(int cpu)
}
#define arch_scale_freq_capacity arch_scale_freq_capacity
+bool arch_enable_hybrid_capacity_scale(void);
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+ unsigned long cap_freq, unsigned long base_freq);
+
+unsigned long arch_scale_cpu_capacity(int cpu);
+#define arch_scale_cpu_capacity arch_scale_cpu_capacity
+
extern void arch_set_max_freq_ratio(bool turbo_disabled);
extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
#else
+static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
+static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
+ unsigned long max_cap,
+ unsigned long cap_freq,
+ unsigned long base_freq) { }
+
static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
#endif
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index ff8f25faca3d..956984054bf3 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -9,6 +9,17 @@
#include <asm/processor.h>
#include <asm/topology.h>
+#define CPPC_HIGHEST_PERF_PERFORMANCE 196
+#define CPPC_HIGHEST_PERF_PREFCORE 166
+
+enum amd_pref_core {
+ AMD_PREF_CORE_UNKNOWN = 0,
+ AMD_PREF_CORE_SUPPORTED,
+ AMD_PREF_CORE_UNSUPPORTED,
+};
+static enum amd_pref_core amd_pref_core_detected;
+static u64 boost_numerator;
+
/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
bool cpc_supported_by_cpu(void)
@@ -69,31 +80,30 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
static void amd_set_max_freq_ratio(void)
{
struct cppc_perf_caps perf_caps;
- u64 highest_perf, nominal_perf;
+ u64 numerator, nominal_perf;
u64 perf_ratio;
int rc;
rc = cppc_get_perf_caps(0, &perf_caps);
if (rc) {
- pr_debug("Could not retrieve perf counters (%d)\n", rc);
+ pr_warn("Could not retrieve perf counters (%d)\n", rc);
return;
}
- highest_perf = amd_get_highest_perf();
+ rc = amd_get_boost_ratio_numerator(0, &numerator);
+ if (rc) {
+ pr_warn("Could not retrieve highest performance (%d)\n", rc);
+ return;
+ }
nominal_perf = perf_caps.nominal_perf;
- if (!highest_perf || !nominal_perf) {
- pr_debug("Could not retrieve highest or nominal performance\n");
+ if (!nominal_perf) {
+ pr_warn("Could not retrieve nominal performance\n");
return;
}
- perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
/* midpoint between max_boost and max_P */
- perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
- if (!perf_ratio) {
- pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
- return;
- }
+ perf_ratio = (div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf) + SCHED_CAPACITY_SCALE) >> 1;
freq_invariance_set_perf_ratio(perf_ratio, false);
}
@@ -116,3 +126,143 @@ void init_freq_invariance_cppc(void)
init_done = true;
mutex_unlock(&freq_invariance_lock);
}
+
+/*
+ * Get the highest performance register value.
+ * @cpu: CPU from which to get highest performance.
+ * @highest_perf: Return address for highest performance value.
+ *
+ * Return: 0 for success, negative error code otherwise.
+ */
+int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
+{
+ u64 val;
+ int ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val);
+ if (ret)
+ goto out;
+
+ val = AMD_CPPC_HIGHEST_PERF(val);
+ } else {
+ ret = cppc_get_highest_perf(cpu, &val);
+ if (ret)
+ goto out;
+ }
+
+ WRITE_ONCE(*highest_perf, (u32)val);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+
+/**
+ * amd_detect_prefcore: Detect if CPUs in the system support preferred cores
+ * @detected: Output variable for the result of the detection.
+ *
+ * Determine whether CPUs in the system support preferred cores. On systems
+ * that support preferred cores, different highest perf values will be found
+ * on different cores. On other systems, the highest perf value will be the
+ * same on all cores.
+ *
+ * The result of the detection will be stored in the 'detected' parameter.
+ *
+ * Return: 0 for success, negative error code otherwise
+ */
+int amd_detect_prefcore(bool *detected)
+{
+ int cpu, count = 0;
+ u64 highest_perf[2] = {0};
+
+ if (WARN_ON(!detected))
+ return -EINVAL;
+
+ switch (amd_pref_core_detected) {
+ case AMD_PREF_CORE_SUPPORTED:
+ *detected = true;
+ return 0;
+ case AMD_PREF_CORE_UNSUPPORTED:
+ *detected = false;
+ return 0;
+ default:
+ break;
+ }
+
+ for_each_present_cpu(cpu) {
+ u32 tmp;
+ int ret;
+
+ ret = amd_get_highest_perf(cpu, &tmp);
+ if (ret)
+ return ret;
+
+ if (!count || (count == 1 && tmp != highest_perf[0]))
+ highest_perf[count++] = tmp;
+
+ if (count == 2)
+ break;
+ }
+
+ *detected = (count == 2);
+ boost_numerator = highest_perf[0];
+
+ amd_pref_core_detected = *detected ? AMD_PREF_CORE_SUPPORTED :
+ AMD_PREF_CORE_UNSUPPORTED;
+
+ pr_debug("AMD CPPC preferred core is %ssupported (highest perf: 0x%llx)\n",
+ *detected ? "" : "un", highest_perf[0]);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(amd_detect_prefcore);
+
+/**
+ * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
+ * @cpu: CPU to get numerator for.
+ * @numerator: Output variable for numerator.
+ *
+ * Determine the numerator to use for calculating the boost ratio on
+ * a CPU. On systems that support preferred cores, this will be a hardcoded
+ * value. On other systems this will the highest performance register value.
+ *
+ * If booting the system with amd-pstate enabled but preferred cores disabled then
+ * the correct boost numerator will be returned to match hardware capabilities
+ * even if the preferred cores scheduling hints are not enabled.
+ *
+ * Return: 0 for success, negative error code otherwise.
+ */
+int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
+{
+ bool prefcore;
+ int ret;
+
+ ret = amd_detect_prefcore(&prefcore);
+ if (ret)
+ return ret;
+
+ /* without preferred cores, return the highest perf register value */
+ if (!prefcore) {
+ *numerator = boost_numerator;
+ return 0;
+ }
+
+ /*
+ * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
+ * the highest performance level is set to 196.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=218759
+ */
+ if (cpu_feature_enabled(X86_FEATURE_ZEN4)) {
+ switch (boot_cpu_data.x86_model) {
+ case 0x70 ... 0x7f:
+ *numerator = CPPC_HIGHEST_PERF_PERFORMANCE;
+ return 0;
+ default:
+ break;
+ }
+ }
+ *numerator = CPPC_HIGHEST_PERF_PREFCORE;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(amd_get_boost_ratio_numerator);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 66fd4b2a37a3..373638691cd4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1775,12 +1775,9 @@ static __init void apic_set_fixmap(bool read_apic);
static __init void x2apic_disable(void)
{
- u32 x2apic_id, state = x2apic_state;
+ u32 x2apic_id;
- x2apic_mode = 0;
- x2apic_state = X2APIC_DISABLED;
-
- if (state != X2APIC_ON)
+ if (x2apic_state < X2APIC_ON)
return;
x2apic_id = read_apic_id();
@@ -1793,6 +1790,10 @@ static __init void x2apic_disable(void)
}
__x2apic_disable();
+
+ x2apic_mode = 0;
+ x2apic_state = X2APIC_DISABLED;
+
/*
* Don't reread the APIC ID as it was already done from
* check_x2apic() and the APIC driver still is a x2APIC variant,
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1e0fe5f8ab84..015971adadfc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1190,22 +1190,6 @@ unsigned long amd_get_dr_addr_mask(unsigned int dr)
}
EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask);
-u32 amd_get_highest_perf(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
- (c->x86_model >= 0x70 && c->x86_model < 0x80)))
- return 166;
-
- if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
- (c->x86_model >= 0x40 && c->x86_model < 0x70)))
- return 166;
-
- return 255;
-}
-EXPORT_SYMBOL_GPL(amd_get_highest_perf);
-
static void zenbleed_check_cpu(void *unused)
{
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 0b69bfbf345d..f642de2ebdac 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -349,9 +349,89 @@ static DECLARE_WORK(disable_freq_invariance_work,
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
+static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);
+
+struct arch_hybrid_cpu_scale {
+ unsigned long capacity;
+ unsigned long freq_ratio;
+};
+
+static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;
+
+/**
+ * arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling
+ *
+ * Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
+ * initialize it and set the static key controlling its code paths.
+ *
+ * Must be called before arch_set_cpu_capacity().
+ */
+bool arch_enable_hybrid_capacity_scale(void)
+{
+ int cpu;
+
+ if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
+ WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled");
+ return true;
+ }
+
+ arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
+ if (!arch_cpu_scale)
+ return false;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
+ per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
+ }
+
+ static_branch_enable(&arch_hybrid_cap_scale_key);
+
+ pr_info("Hybrid CPU capacity scaling enabled\n");
+
+ return true;
+}
+
+/**
+ * arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU
+ * @cpu: Target CPU.
+ * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap.
+ * @max_cap: System-wide maximum CPU capacity.
+ * @cap_freq: Frequency of @cpu corresponding to @cap.
+ * @base_freq: Frequency of @cpu at which MPERF counts.
+ *
+ * The units in which @cap and @max_cap are expressed do not matter, so long
+ * as they are consistent, because the former is effectively divided by the
+ * latter. Analogously for @cap_freq and @base_freq.
+ *
+ * After calling this function for all CPUs, call arch_rebuild_sched_domains()
+ * to let the scheduler know that capacity-aware scheduling can be used going
+ * forward.
+ */
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+ unsigned long cap_freq, unsigned long base_freq)
+{
+ if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
+ WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
+ div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
+ WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
+ div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
+ } else {
+ WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
+ }
+}
+
+unsigned long arch_scale_cpu_capacity(int cpu)
+{
+ if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
+ return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);
+
+ return SCHED_CAPACITY_SCALE;
+}
+EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);
+
static void scale_freq_tick(u64 acnt, u64 mcnt)
{
- u64 freq_scale;
+ u64 freq_scale, freq_ratio;
if (!arch_scale_freq_invariant())
return;
@@ -359,7 +439,12 @@ static void scale_freq_tick(u64 acnt, u64 mcnt)
if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
goto error;
- if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
+ if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
+ freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
+ else
+ freq_ratio = arch_max_freq_ratio;
+
+ if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt)
goto error;
freq_scale = div64_u64(acnt, mcnt);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 45675da354f3..d1915427b4ff 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -233,7 +233,8 @@ static void x86_amd_ssb_disable(void)
#define pr_fmt(fmt) "MDS: " fmt
/* Default mitigation for MDS-affected CPUs */
-static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL;
+static enum mds_mitigations mds_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_MDS) ? MDS_MITIGATION_FULL : MDS_MITIGATION_OFF;
static bool mds_nosmt __ro_after_init = false;
static const char * const mds_strings[] = {
@@ -293,7 +294,8 @@ enum taa_mitigations {
};
/* Default mitigation for TAA-affected CPUs */
-static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
+static enum taa_mitigations taa_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_TAA) ? TAA_MITIGATION_VERW : TAA_MITIGATION_OFF;
static bool taa_nosmt __ro_after_init;
static const char * const taa_strings[] = {
@@ -391,7 +393,8 @@ enum mmio_mitigations {
};
/* Default mitigation for Processor MMIO Stale Data vulnerabilities */
-static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW;
+static enum mmio_mitigations mmio_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_MMIO_STALE_DATA) ? MMIO_MITIGATION_VERW : MMIO_MITIGATION_OFF;
static bool mmio_nosmt __ro_after_init = false;
static const char * const mmio_strings[] = {
@@ -605,7 +608,8 @@ enum srbds_mitigations {
SRBDS_MITIGATION_HYPERVISOR,
};
-static enum srbds_mitigations srbds_mitigation __ro_after_init = SRBDS_MITIGATION_FULL;
+static enum srbds_mitigations srbds_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SRBDS) ? SRBDS_MITIGATION_FULL : SRBDS_MITIGATION_OFF;
static const char * const srbds_strings[] = {
[SRBDS_MITIGATION_OFF] = "Vulnerable",
@@ -731,11 +735,8 @@ enum gds_mitigations {
GDS_MITIGATION_HYPERVISOR,
};
-#if IS_ENABLED(CONFIG_MITIGATION_GDS_FORCE)
-static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
-#else
-static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
-#endif
+static enum gds_mitigations gds_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_GDS) ? GDS_MITIGATION_FULL : GDS_MITIGATION_OFF;
static const char * const gds_strings[] = {
[GDS_MITIGATION_OFF] = "Vulnerable",
@@ -871,7 +872,8 @@ enum spectre_v1_mitigation {
};
static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init =
- SPECTRE_V1_MITIGATION_AUTO;
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V1) ?
+ SPECTRE_V1_MITIGATION_AUTO : SPECTRE_V1_MITIGATION_NONE;
static const char * const spectre_v1_strings[] = {
[SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers",
@@ -986,7 +988,7 @@ static const char * const retbleed_strings[] = {
static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
RETBLEED_MITIGATION_NONE;
static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
- RETBLEED_CMD_AUTO;
+ IS_ENABLED(CONFIG_MITIGATION_RETBLEED) ? RETBLEED_CMD_AUTO : RETBLEED_CMD_OFF;
static int __ro_after_init retbleed_nosmt = false;
@@ -1447,17 +1449,18 @@ static void __init spec_v2_print_cond(const char *reason, bool secure)
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
{
- enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+ enum spectre_v2_mitigation_cmd cmd;
char arg[20];
int ret, i;
+ cmd = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ? SPECTRE_V2_CMD_AUTO : SPECTRE_V2_CMD_NONE;
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
cpu_mitigations_off())
return SPECTRE_V2_CMD_NONE;
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
- return SPECTRE_V2_CMD_AUTO;
+ return cmd;
for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
if (!match_option(arg, ret, mitigation_options[i].option))
@@ -1467,8 +1470,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
}
if (i >= ARRAY_SIZE(mitigation_options)) {
- pr_err("unknown option (%s). Switching to AUTO select\n", arg);
- return SPECTRE_V2_CMD_AUTO;
+ pr_err("unknown option (%s). Switching to default mode\n", arg);
+ return cmd;
}
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
@@ -2021,10 +2024,12 @@ static const struct {
static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
{
- enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+ enum ssb_mitigation_cmd cmd;
char arg[20];
int ret, i;
+ cmd = IS_ENABLED(CONFIG_MITIGATION_SSB) ?
+ SPEC_STORE_BYPASS_CMD_AUTO : SPEC_STORE_BYPASS_CMD_NONE;
if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
cpu_mitigations_off()) {
return SPEC_STORE_BYPASS_CMD_NONE;
@@ -2032,7 +2037,7 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
arg, sizeof(arg));
if (ret < 0)
- return SPEC_STORE_BYPASS_CMD_AUTO;
+ return cmd;
for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
if (!match_option(arg, ret, ssb_mitigation_options[i].option))
@@ -2043,8 +2048,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
}
if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
- pr_err("unknown option (%s). Switching to AUTO select\n", arg);
- return SPEC_STORE_BYPASS_CMD_AUTO;
+ pr_err("unknown option (%s). Switching to default mode\n", arg);
+ return cmd;
}
}
@@ -2371,7 +2376,8 @@ EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
#define pr_fmt(fmt) "L1TF: " fmt
/* Default mitigation for L1TF-affected CPUs */
-enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH;
+enum l1tf_mitigations l1tf_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_L1TF) ? L1TF_MITIGATION_FLUSH : L1TF_MITIGATION_OFF;
#if IS_ENABLED(CONFIG_KVM_INTEL)
EXPORT_SYMBOL_GPL(l1tf_mitigation);
#endif
@@ -2551,10 +2557,9 @@ static void __init srso_select_mitigation(void)
{
bool has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE);
- if (cpu_mitigations_off())
- return;
-
- if (!boot_cpu_has_bug(X86_BUG_SRSO)) {
+ if (!boot_cpu_has_bug(X86_BUG_SRSO) ||
+ cpu_mitigations_off() ||
+ srso_cmd == SRSO_CMD_OFF) {
if (boot_cpu_has(X86_FEATURE_SBPB))
x86_pred_cmd = PRED_CMD_SBPB;
return;
@@ -2585,11 +2590,6 @@ static void __init srso_select_mitigation(void)
}
switch (srso_cmd) {
- case SRSO_CMD_OFF:
- if (boot_cpu_has(X86_FEATURE_SBPB))
- x86_pred_cmd = PRED_CMD_SBPB;
- return;
-
case SRSO_CMD_MICROCODE:
if (has_microcode) {
srso_mitigation = SRSO_MITIGATION_MICROCODE;
@@ -2643,6 +2643,8 @@ static void __init srso_select_mitigation(void)
pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
}
break;
+ default:
+ break;
}
out:
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d4e539d4e158..be307c9ef263 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1165,8 +1165,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(INTEL_CORE_YONAH, NO_SSB),
- VULNWL_INTEL(INTEL_ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(INTEL_ATOM_AIRMONT_MID, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY),
+ VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(INTEL_ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
VULNWL_INTEL(INTEL_ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 08b95a35b5cb..e7656cbef68d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -311,16 +311,18 @@ static void early_init_intel(struct cpuinfo_x86 *c)
}
/*
- * There is a known erratum on Pentium III and Core Solo
- * and Core Duo CPUs.
- * " Page with PAT set to WC while associated MTRR is UC
- * may consolidate to UC "
- * Because of this erratum, it is better to stick with
- * setting WC in MTRR rather than using PAT on these CPUs.
+ * PAT is broken on early family 6 CPUs, the last of which
+ * is "Yonah" where the erratum is named "AN7":
*
- * Enable PAT WC only on P4, Core 2 or later CPUs.
+ * Page with PAT (Page Attribute Table) Set to USWC
+ * (Uncacheable Speculative Write Combine) While
+ * Associated MTRR (Memory Type Range Register) Is UC
+ * (Uncacheable) May Consolidate to UC
+ *
+ * Disable PAT and fall back to MTRR on these CPUs.
*/
- if (c->x86 == 6 && c->x86_model < 15)
+ if (c->x86_vfm >= INTEL_PENTIUM_PRO &&
+ c->x86_vfm <= INTEL_CORE_YONAH)
clear_cpu_cap(c, X86_FEATURE_PAT);
/*
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9a0133ef7e20..14bf8c232e45 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -780,7 +780,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
struct mce m;
- mce_setup(&m);
+ mce_prep_record(&m);
m.status = status;
m.misc = misc;
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 7f7309ff67d0..3885fe05f01e 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -44,7 +44,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
else
lsb = PAGE_SHIFT;
- mce_setup(&m);
+ mce_prep_record(&m);
m.bank = -1;
/* Fake a memory read error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
@@ -66,6 +66,7 @@ EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
+ bool apicid_found = false;
unsigned int cpu;
struct mce m;
@@ -97,20 +98,19 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
if (ctx_info->reg_arr_size < 48)
return -EINVAL;
- mce_setup(&m);
-
- m.extcpu = -1;
- m.socketid = -1;
-
for_each_possible_cpu(cpu) {
if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
- m.extcpu = cpu;
- m.socketid = cpu_data(m.extcpu).topo.pkg_id;
+ apicid_found = true;
break;
}
}
- m.apicid = lapic_id;
+ if (!apicid_found)
+ return -EINVAL;
+
+ mce_prep_record_common(&m);
+ mce_prep_record_per_cpu(cpu, &m);
+
m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
m.status = *i_mce;
m.addr = *(i_mce + 1);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b85ec7a4ec9e..2a938f429c4d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -117,20 +117,32 @@ static struct irq_work mce_irq_work;
*/
BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
-/* Do initial initialization of a struct mce */
-void mce_setup(struct mce *m)
+void mce_prep_record_common(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
- m->cpu = m->extcpu = smp_processor_id();
+
+ m->cpuid = cpuid_eax(1);
+ m->cpuvendor = boot_cpu_data.x86_vendor;
+ m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
/* need the internal __ version to avoid deadlocks */
- m->time = __ktime_get_real_seconds();
- m->cpuvendor = boot_cpu_data.x86_vendor;
- m->cpuid = cpuid_eax(1);
- m->socketid = cpu_data(m->extcpu).topo.pkg_id;
- m->apicid = cpu_data(m->extcpu).topo.initial_apicid;
- m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
- m->ppin = cpu_data(m->extcpu).ppin;
- m->microcode = boot_cpu_data.microcode;
+ m->time = __ktime_get_real_seconds();
+}
+
+void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m)
+{
+ m->cpu = cpu;
+ m->extcpu = cpu;
+ m->apicid = cpu_data(cpu).topo.initial_apicid;
+ m->microcode = cpu_data(cpu).microcode;
+ m->ppin = topology_ppin(cpu);
+ m->socketid = topology_physical_package_id(cpu);
+}
+
+/* Do initial initialization of a struct mce */
+void mce_prep_record(struct mce *m)
+{
+ mce_prep_record_common(m);
+ mce_prep_record_per_cpu(smp_processor_id(), m);
}
DEFINE_PER_CPU(struct mce, injectm);
@@ -436,11 +448,11 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
{
/*
- * Enable instrumentation around mce_setup() which calls external
+ * Enable instrumentation around mce_prep_record() which calls external
* facilities.
*/
instrumentation_begin();
- mce_setup(m);
+ mce_prep_record(m);
instrumentation_end();
m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 01f8f03969e6..43c7f3b71df5 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -261,6 +261,8 @@ enum mca_msr {
/* Decide whether to add MCE record to MCE event pool or filter it out. */
extern bool filter_mce(struct mce *m);
+void mce_prep_record_common(struct mce *m);
+void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c0d56c02b8da..f63b051f25a0 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -89,6 +89,31 @@ static struct equiv_cpu_table {
struct equiv_cpu_entry *entry;
} equiv_table;
+union zen_patch_rev {
+ struct {
+ __u32 rev : 8,
+ stepping : 4,
+ model : 4,
+ __reserved : 4,
+ ext_model : 4,
+ ext_fam : 8;
+ };
+ __u32 ucode_rev;
+};
+
+union cpuid_1_eax {
+ struct {
+ __u32 stepping : 4,
+ model : 4,
+ family : 4,
+ __reserved0 : 4,
+ ext_model : 4,
+ ext_fam : 8,
+ __reserved1 : 4;
+ };
+ __u32 full;
+};
+
/*
* This points to the current valid container of microcode patches which we will
* save from the initrd/builtin before jettisoning its contents. @mc is the
@@ -96,7 +121,6 @@ static struct equiv_cpu_table {
*/
struct cont_desc {
struct microcode_amd *mc;
- u32 cpuid_1_eax;
u32 psize;
u8 *data;
size_t size;
@@ -109,10 +133,42 @@ struct cont_desc {
static const char
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
+/*
+ * This is CPUID(1).EAX on the BSP. It is used in two ways:
+ *
+ * 1. To ignore the equivalence table on Zen1 and newer.
+ *
+ * 2. To match which patches to load because the patch revision ID
+ * already contains the f/m/s for which the microcode is destined
+ * for.
+ */
+static u32 bsp_cpuid_1_eax __ro_after_init;
+
+static union cpuid_1_eax ucode_rev_to_cpuid(unsigned int val)
+{
+ union zen_patch_rev p;
+ union cpuid_1_eax c;
+
+ p.ucode_rev = val;
+ c.full = 0;
+
+ c.stepping = p.stepping;
+ c.model = p.model;
+ c.ext_model = p.ext_model;
+ c.family = 0xf;
+ c.ext_fam = p.ext_fam;
+
+ return c;
+}
+
static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig)
{
unsigned int i;
+ /* Zen and newer do not need an equivalence table. */
+ if (x86_family(bsp_cpuid_1_eax) >= 0x17)
+ return 0;
+
if (!et || !et->num_entries)
return 0;
@@ -159,6 +215,10 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size)
if (!verify_container(buf, buf_size))
return false;
+ /* Zen and newer do not need an equivalence table. */
+ if (x86_family(bsp_cpuid_1_eax) >= 0x17)
+ return true;
+
cont_type = hdr[1];
if (cont_type != UCODE_EQUIV_CPU_TABLE_TYPE) {
pr_debug("Wrong microcode container equivalence table type: %u.\n",
@@ -222,8 +282,9 @@ __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize)
* exceed the per-family maximum). @sh_psize is the size read from the section
* header.
*/
-static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size)
+static unsigned int __verify_patch_size(u32 sh_psize, size_t buf_size)
{
+ u8 family = x86_family(bsp_cpuid_1_eax);
u32 max_size;
if (family >= 0x15)
@@ -258,9 +319,9 @@ static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size
* positive: patch is not for this family, skip it
* 0: success
*/
-static int
-verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size)
+static int verify_patch(const u8 *buf, size_t buf_size, u32 *patch_size)
{
+ u8 family = x86_family(bsp_cpuid_1_eax);
struct microcode_header_amd *mc_hdr;
unsigned int ret;
u32 sh_psize;
@@ -286,7 +347,7 @@ verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size)
return -1;
}
- ret = __verify_patch_size(family, sh_psize, buf_size);
+ ret = __verify_patch_size(sh_psize, buf_size);
if (!ret) {
pr_debug("Per-family patch size mismatch.\n");
return -1;
@@ -308,6 +369,15 @@ verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size)
return 0;
}
+static bool mc_patch_matches(struct microcode_amd *mc, u16 eq_id)
+{
+ /* Zen and newer do not need an equivalence table. */
+ if (x86_family(bsp_cpuid_1_eax) >= 0x17)
+ return ucode_rev_to_cpuid(mc->hdr.patch_id).full == bsp_cpuid_1_eax;
+ else
+ return eq_id == mc->hdr.processor_rev_id;
+}
+
/*
* This scans the ucode blob for the proper container as we can have multiple
* containers glued together. Returns the equivalence ID from the equivalence
@@ -336,7 +406,7 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc)
* doesn't contain a patch for the CPU, scan through the whole container
* so that it can be skipped in case there are other containers appended.
*/
- eq_id = find_equiv_id(&table, desc->cpuid_1_eax);
+ eq_id = find_equiv_id(&table, bsp_cpuid_1_eax);
buf += hdr[2] + CONTAINER_HDR_SZ;
size -= hdr[2] + CONTAINER_HDR_SZ;
@@ -350,7 +420,7 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc)
u32 patch_size;
int ret;
- ret = verify_patch(x86_family(desc->cpuid_1_eax), buf, size, &patch_size);
+ ret = verify_patch(buf, size, &patch_size);
if (ret < 0) {
/*
* Patch verification failed, skip to the next container, if
@@ -363,7 +433,7 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc)
}
mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE);
- if (eq_id == mc->hdr.processor_rev_id) {
+ if (mc_patch_matches(mc, eq_id)) {
desc->psize = patch_size;
desc->mc = mc;
}
@@ -421,6 +491,7 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
/* verify patch application was successful */
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
if (rev != mc->hdr.patch_id)
return -1;
@@ -438,14 +509,12 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
*
* Returns true if container found (sets @desc), false otherwise.
*/
-static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size)
+static bool early_apply_microcode(u32 old_rev, void *ucode, size_t size)
{
struct cont_desc desc = { 0 };
struct microcode_amd *mc;
bool ret = false;
- desc.cpuid_1_eax = cpuid_1_eax;
-
scan_containers(ucode, size, &desc);
mc = desc.mc;
@@ -463,9 +532,10 @@ static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, siz
return !__apply_microcode_amd(mc);
}
-static bool get_builtin_microcode(struct cpio_data *cp, u8 family)
+static bool get_builtin_microcode(struct cpio_data *cp)
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
+ u8 family = x86_family(bsp_cpuid_1_eax);
struct firmware fw;
if (IS_ENABLED(CONFIG_X86_32))
@@ -484,11 +554,11 @@ static bool get_builtin_microcode(struct cpio_data *cp, u8 family)
return false;
}
-static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpio_data *ret)
+static void __init find_blobs_in_containers(struct cpio_data *ret)
{
struct cpio_data cp;
- if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax)))
+ if (!get_builtin_microcode(&cp))
cp = find_microcode_in_initrd(ucode_path);
*ret = cp;
@@ -499,16 +569,18 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_
struct cpio_data cp = { };
u32 dummy;
+ bsp_cpuid_1_eax = cpuid_1_eax;
+
native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy);
/* Needed in load_microcode_amd() */
ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax;
- find_blobs_in_containers(cpuid_1_eax, &cp);
+ find_blobs_in_containers(&cp);
if (!(cp.data && cp.size))
return;
- if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size))
+ if (early_apply_microcode(ed->old_rev, cp.data, cp.size))
native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
}
@@ -525,12 +597,10 @@ static int __init save_microcode_in_initrd(void)
if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10)
return 0;
- find_blobs_in_containers(cpuid_1_eax, &cp);
+ find_blobs_in_containers(&cp);
if (!(cp.data && cp.size))
return -EINVAL;
- desc.cpuid_1_eax = cpuid_1_eax;
-
scan_containers(cp.data, cp.size, &desc);
if (!desc.mc)
return -EINVAL;
@@ -543,26 +613,65 @@ static int __init save_microcode_in_initrd(void)
}
early_initcall(save_microcode_in_initrd);
+static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n)
+{
+ /* Zen and newer hardcode the f/m/s in the patch ID */
+ if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
+ union cpuid_1_eax p_cid = ucode_rev_to_cpuid(p->patch_id);
+ union cpuid_1_eax n_cid = ucode_rev_to_cpuid(n->patch_id);
+
+ /* Zap stepping */
+ p_cid.stepping = 0;
+ n_cid.stepping = 0;
+
+ return p_cid.full == n_cid.full;
+ } else {
+ return p->equiv_cpu == n->equiv_cpu;
+ }
+}
+
/*
* a small, trivial cache of per-family ucode patches
*/
-static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
+static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equiv_cpu)
{
struct ucode_patch *p;
+ struct ucode_patch n;
+
+ n.equiv_cpu = equiv_cpu;
+ n.patch_id = uci->cpu_sig.rev;
+
+ WARN_ON_ONCE(!n.patch_id);
list_for_each_entry(p, &microcode_cache, plist)
- if (p->equiv_cpu == equiv_cpu)
+ if (patch_cpus_equivalent(p, &n))
return p;
+
return NULL;
}
+static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
+{
+ /* Zen and newer hardcode the f/m/s in the patch ID */
+ if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
+ union zen_patch_rev zp, zn;
+
+ zp.ucode_rev = p->patch_id;
+ zn.ucode_rev = n->patch_id;
+
+ return zn.rev > zp.rev;
+ } else {
+ return n->patch_id > p->patch_id;
+ }
+}
+
static void update_cache(struct ucode_patch *new_patch)
{
struct ucode_patch *p;
list_for_each_entry(p, &microcode_cache, plist) {
- if (p->equiv_cpu == new_patch->equiv_cpu) {
- if (p->patch_id >= new_patch->patch_id) {
+ if (patch_cpus_equivalent(p, new_patch)) {
+ if (!patch_newer(p, new_patch)) {
/* we already have the latest patch */
kfree(new_patch->data);
kfree(new_patch);
@@ -593,13 +702,22 @@ static void free_cache(void)
static struct ucode_patch *find_patch(unsigned int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- u16 equiv_id;
+ u32 rev, dummy __always_unused;
+ u16 equiv_id = 0;
- equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig);
- if (!equiv_id)
- return NULL;
+ /* fetch rev if not populated yet: */
+ if (!uci->cpu_sig.rev) {
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+ uci->cpu_sig.rev = rev;
+ }
+
+ if (x86_family(bsp_cpuid_1_eax) < 0x17) {
+ equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig);
+ if (!equiv_id)
+ return NULL;
+ }
- return cache_find_patch(equiv_id);
+ return cache_find_patch(uci, equiv_id);
}
void reload_ucode_amd(unsigned int cpu)
@@ -649,7 +767,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
struct ucode_cpu_info *uci;
struct ucode_patch *p;
enum ucode_state ret;
- u32 rev, dummy __always_unused;
+ u32 rev;
BUG_ON(raw_smp_processor_id() != cpu);
@@ -659,11 +777,11 @@ static enum ucode_state apply_microcode_amd(int cpu)
if (!p)
return UCODE_NFOUND;
+ rev = uci->cpu_sig.rev;
+
mc_amd = p->data;
uci->mc = p->data;
- rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
/* need to apply patch? */
if (rev > mc_amd->hdr.patch_id) {
ret = UCODE_OK;
@@ -709,6 +827,10 @@ static size_t install_equiv_cpu_table(const u8 *buf, size_t buf_size)
hdr = (const u32 *)buf;
equiv_tbl_len = hdr[2];
+ /* Zen and newer do not need an equivalence table. */
+ if (x86_family(bsp_cpuid_1_eax) >= 0x17)
+ goto out;
+
equiv_table.entry = vmalloc(equiv_tbl_len);
if (!equiv_table.entry) {
pr_err("failed to allocate equivalent CPU table\n");
@@ -718,12 +840,16 @@ static size_t install_equiv_cpu_table(const u8 *buf, size_t buf_size)
memcpy(equiv_table.entry, buf + CONTAINER_HDR_SZ, equiv_tbl_len);
equiv_table.num_entries = equiv_tbl_len / sizeof(struct equiv_cpu_entry);
+out:
/* add header length */
return equiv_tbl_len + CONTAINER_HDR_SZ;
}
static void free_equiv_cpu_table(void)
{
+ if (x86_family(bsp_cpuid_1_eax) >= 0x17)
+ return;
+
vfree(equiv_table.entry);
memset(&equiv_table, 0, sizeof(equiv_table));
}
@@ -749,7 +875,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover,
u16 proc_id;
int ret;
- ret = verify_patch(family, fw, leftover, patch_size);
+ ret = verify_patch(fw, leftover, patch_size);
if (ret)
return ret;
@@ -774,7 +900,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover,
patch->patch_id = mc_hdr->patch_id;
patch->equiv_cpu = proc_id;
- pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n",
+ pr_debug("%s: Adding patch_id: 0x%08x, proc_id: 0x%04x\n",
__func__, patch->patch_id, proc_id);
/* ... and add to cache. */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..ead967479fa6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -199,8 +199,8 @@ static void hv_machine_shutdown(void)
* Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
* corrupts the old VP Assist Pages and can crash the kexec kernel.
*/
- if (kexec_in_progress && hyperv_init_cpuhp > 0)
- cpuhp_remove_state(hyperv_init_cpuhp);
+ if (kexec_in_progress)
+ cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE);
/* The function calls stop_other_cpus(). */
native_machine_shutdown();
@@ -424,6 +424,7 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
x86_platform.calibrate_tsc = hv_get_tsc_khz;
x86_platform.calibrate_cpu = hv_get_tsc_khz;
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
}
if (ms_hyperv.priv_high & HV_ISOLATION) {
@@ -449,9 +450,23 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
if (!ms_hyperv.paravisor_present) {
- /* To be supported: more work is required. */
+ /*
+ * Mark the Hyper-V TSC page feature as disabled
+ * in a TDX VM without paravisor so that the
+ * Invariant TSC, which is a better clocksource
+ * anyway, is used instead.
+ */
ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
+ /*
+ * The Invariant TSC is expected to be available
+ * in a TDX VM without paravisor, but if not,
+ * print a warning message. The slower Hyper-V MSR-based
+ * Ref Counter should end up being the clocksource.
+ */
+ if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
+ pr_warn("Hyper-V: Invariant TSC is unavailable\n");
+
/* HV_MSR_CRASH_CTL is unsupported. */
ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 1930fce9dfe9..8591d53c144b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -119,6 +119,14 @@ struct rdt_hw_resource rdt_resources_all[] = {
},
};
+u32 resctrl_arch_system_num_rmid_idx(void)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+ /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
+ return r->num_rmid;
+}
+
/*
* cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
* as they do not have CPUID enumeration support for Cache allocation.
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 27892e57c4ef..f3f1461273ee 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -475,24 +475,25 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
{
struct sgx_epc_page *page;
int nid_of_current = numa_node_id();
- int nid = nid_of_current;
+ int nid_start, nid;
- if (node_isset(nid_of_current, sgx_numa_mask)) {
- page = __sgx_alloc_epc_page_from_node(nid_of_current);
- if (page)
- return page;
- }
-
- /* Fall back to the non-local NUMA nodes: */
- while (true) {
- nid = next_node_in(nid, sgx_numa_mask);
- if (nid == nid_of_current)
- break;
+ /*
+ * Try local node first. If it doesn't have an EPC section,
+ * fall back to the non-local NUMA nodes.
+ */
+ if (node_isset(nid_of_current, sgx_numa_mask))
+ nid_start = nid_of_current;
+ else
+ nid_start = next_node_in(nid_of_current, sgx_numa_mask);
+ nid = nid_start;
+ do {
page = __sgx_alloc_epc_page_from_node(nid);
if (page)
return page;
- }
+
+ nid = next_node_in(nid, sgx_numa_mask);
+ } while (nid != nid_start);
return ERR_PTR(-ENOMEM);
}
@@ -847,6 +848,13 @@ static bool __init sgx_page_cache_init(void)
return false;
}
+ for_each_online_node(nid) {
+ if (!node_isset(nid, sgx_numa_mask) &&
+ node_state(nid, N_MEMORY) && node_state(nid, N_CPU))
+ pr_info("node%d has both CPUs and memory but doesn't have an EPC section\n",
+ nid);
+ }
+
return true;
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c5a026fee5e0..1339f8328db5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -788,6 +788,9 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
goto out_disable;
}
+ fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
+ XFEATURE_MASK_INDEPENDENT;
+
/*
* Clear XSAVE features that are disabled in the normal CPUID.
*/
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 2ee0b9c53dcc..afb404cd2059 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -62,9 +62,9 @@ static inline u64 xfeatures_mask_supervisor(void)
static inline u64 xfeatures_mask_independent(void)
{
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
- return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
+ return fpu_kernel_cfg.independent_features & ~XFEATURE_MASK_LBR;
- return XFEATURE_MASK_INDEPENDENT;
+ return fpu_kernel_cfg.independent_features;
}
/* XSAVE/XRSTOR wrapper functions */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 472a1537b7a9..730c2f34d347 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -19,7 +19,6 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
- depends on HIGH_RES_TIMERS
depends on X86_LOCAL_APIC
select KVM_COMMON
select KVM_GENERIC_MMU_NOTIFIER
@@ -144,8 +143,10 @@ config KVM_AMD_SEV
select HAVE_KVM_ARCH_GMEM_PREPARE
select HAVE_KVM_ARCH_GMEM_INVALIDATE
help
- Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
- with Encrypted State (SEV-ES) on AMD processors.
+ Provides support for launching encrypted VMs which use Secure
+ Encrypted Virtualization (SEV), Secure Encrypted Virtualization with
+ Encrypted State (SEV-ES), and Secure Encrypted Virtualization with
+ Secure Nested Paging (SEV-SNP) technologies on AMD processors.
config KVM_SMM
bool "System Management Mode emulation"
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 928cf84778b0..7813d28b082f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4674,16 +4674,14 @@ out_unlock:
bool kvm_mmu_may_ignore_guest_pat(void)
{
/*
- * When EPT is enabled (shadow_memtype_mask is non-zero), the CPU does
- * not support self-snoop (or is affected by an erratum), and the VM
+ * When EPT is enabled (shadow_memtype_mask is non-zero), and the VM
* has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is to
* honor the memtype from the guest's PAT so that guest accesses to
* memory that is DMA'd aren't cached against the guest's wishes. As a
* result, KVM _may_ ignore guest PAT, whereas without non-coherent DMA,
- * KVM _always_ ignores or honors guest PAT, i.e. doesn't toggle SPTE
- * bits in response to non-coherent device (un)registration.
+ * KVM _always_ ignores guest PAT (when EPT is enabled).
*/
- return !static_cpu_has(X86_FEATURE_SELFSNOOP) && shadow_memtype_mask;
+ return shadow_memtype_mask;
}
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -4750,7 +4748,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
* reload is efficient when called repeatedly, so we can do it on
* every iteration.
*/
- kvm_mmu_reload(vcpu);
+ r = kvm_mmu_reload(vcpu);
+ if (r)
+ return r;
if (kvm_arch_has_private_mem(vcpu->kvm) &&
kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa)))
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index d4527965e48c..8f7eb3ad88fc 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -391,9 +391,9 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
mmio_value = 0;
/*
- * The masked MMIO value must obviously match itself and a removed SPTE
- * must not get a false positive. Removed SPTEs and MMIO SPTEs should
- * never collide as MMIO must set some RWX bits, and removed SPTEs must
+ * The masked MMIO value must obviously match itself and a frozen SPTE
+ * must not get a false positive. Frozen SPTEs and MMIO SPTEs should
+ * never collide as MMIO must set some RWX bits, and frozen SPTEs must
* not set any RWX bits.
*/
if (WARN_ON((mmio_value & mmio_mask) != mmio_value) ||
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index ef793c459b05..2cb816ea2430 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -214,7 +214,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
*/
#define FROZEN_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL)
-/* Removed SPTEs must not be misconstrued as shadow present PTEs. */
+/* Frozen SPTEs must not be misconstrued as shadow present PTEs. */
static_assert(!(FROZEN_SPTE & SPTE_MMU_PRESENT_MASK));
static inline bool is_frozen_spte(u64 spte)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c7dc49ee7388..3c55955bcaf8 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -359,10 +359,10 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
/*
* Set the SPTE to a nonpresent value that other
* threads will not overwrite. If the SPTE was
- * already marked as removed then another thread
+ * already marked as frozen then another thread
* handling a page fault could overwrite it, so
* set the SPTE until it is set from some other
- * value to the removed SPTE value.
+ * value to the frozen SPTE value.
*/
for (;;) {
old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, FROZEN_SPTE);
@@ -536,8 +536,8 @@ static inline int __must_check __tdp_mmu_set_spte_atomic(struct tdp_iter *iter,
u64 *sptep = rcu_dereference(iter->sptep);
/*
- * The caller is responsible for ensuring the old SPTE is not a REMOVED
- * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
+ * The caller is responsible for ensuring the old SPTE is not a FROZEN
+ * SPTE. KVM should never attempt to zap or manipulate a FROZEN SPTE,
* and pre-checking before inserting a new SPTE is advantageous as it
* avoids unnecessary work.
*/
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d6f252555ab3..5ab2c92c7331 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2876,6 +2876,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CSTAR:
msr_info->data = svm->vmcb01.ptr->save.cstar;
break;
+ case MSR_GS_BASE:
+ msr_info->data = svm->vmcb01.ptr->save.gs.base;
+ break;
+ case MSR_FS_BASE:
+ msr_info->data = svm->vmcb01.ptr->save.fs.base;
+ break;
case MSR_KERNEL_GS_BASE:
msr_info->data = svm->vmcb01.ptr->save.kernel_gs_base;
break;
@@ -3101,6 +3107,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_CSTAR:
svm->vmcb01.ptr->save.cstar = data;
break;
+ case MSR_GS_BASE:
+ svm->vmcb01.ptr->save.gs.base = data;
+ break;
+ case MSR_FS_BASE:
+ svm->vmcb01.ptr->save.fs.base = data;
+ break;
case MSR_KERNEL_GS_BASE:
svm->vmcb01.ptr->save.kernel_gs_base = data;
break;
@@ -5224,6 +5236,9 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x8000001F (SME/SEV features) */
sev_set_cpu_caps();
+
+ /* Don't advertise Bus Lock Detect to guest if SVM support is absent */
+ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
}
static __init int svm_hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f18c2d8c7476..733a0c45d1a6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7659,13 +7659,11 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
/*
* Force WB and ignore guest PAT if the VM does NOT have a non-coherent
- * device attached and the CPU doesn't support self-snoop. Letting the
- * guest control memory types on Intel CPUs without self-snoop may
- * result in unexpected behavior, and so KVM's (historical) ABI is to
- * trust the guest to behave only as a last resort.
+ * device attached. Letting the guest control memory types on Intel
+ * CPUs may result in unexpected behavior, and so KVM's ABI is to trust
+ * the guest to behave only as a last resort.
*/
- if (!static_cpu_has(X86_FEATURE_SELFSNOOP) &&
- !kvm_arch_has_noncoherent_dma(vcpu->kvm))
+ if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 70219e406987..c983c8e434b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4656,7 +4656,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ASYNC_PF_INT:
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_KVMCLOCK_CTRL:
- case KVM_CAP_READONLY_MEM:
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
case KVM_CAP_TSC_DEADLINE_TIMER:
case KVM_CAP_DISABLE_QUIRKS:
@@ -4815,6 +4814,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VM_TYPES:
r = kvm_caps.supported_vm_types;
break;
+ case KVM_CAP_READONLY_MEM:
+ r = kvm ? kvm_arch_has_readonly_mem(kvm) : 1;
+ break;
default:
break;
}
@@ -6040,7 +6042,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
break;
+ kvm_vcpu_srcu_read_lock(vcpu);
r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
+ kvm_vcpu_srcu_read_unlock(vcpu);
break;
}
case KVM_GET_DEBUGREGS: {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d8dbeac8b206..ff253648706f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -958,8 +958,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
struct mhp_params *params)
{
+ unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1;
int ret;
+ if (WARN_ON_ONCE(end > PHYSMEM_END))
+ return -ERANGE;
+
ret = __add_pages(nid, start_pfn, nr_pages, params);
WARN_ON_ONCE(ret);
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 37db264866b6..230f1dee4f09 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -47,13 +47,24 @@ static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
*/
static __initdata struct kaslr_memory_region {
unsigned long *base;
+ unsigned long *end;
unsigned long size_tb;
} kaslr_regions[] = {
- { &page_offset_base, 0 },
- { &vmalloc_base, 0 },
- { &vmemmap_base, 0 },
+ {
+ .base = &page_offset_base,
+ .end = &physmem_end,
+ },
+ {
+ .base = &vmalloc_base,
+ },
+ {
+ .base = &vmemmap_base,
+ },
};
+/* The end of the possible address space for physical memory */
+unsigned long physmem_end __ro_after_init;
+
/* Get size in bytes used by the memory region */
static inline unsigned long get_padding(struct kaslr_memory_region *region)
{
@@ -82,6 +93,8 @@ void __init kernel_randomize_memory(void)
BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
+ /* Preset the end of the possible address space for physical memory */
+ physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
if (!kaslr_memory_enabled())
return;
@@ -128,11 +141,18 @@ void __init kernel_randomize_memory(void)
vaddr += entropy;
*kaslr_regions[i].base = vaddr;
+ /* Calculate the end of the region */
+ vaddr += get_padding(&kaslr_regions[i]);
/*
- * Jump the region and add a minimum padding based on
- * randomization alignment.
+ * KASLR trims the maximum possible size of the
+ * direct-map. Update the physmem_end boundary.
+ * No rounding required as the region starts
+ * PUD aligned and size is in units of TB.
*/
- vaddr += get_padding(&kaslr_regions[i]);
+ if (kaslr_regions[i].end)
+ *kaslr_regions[i].end = __pa_nodebug(vaddr - 1);
+
+ /* Add a minimum padding based on randomization alignment. */
vaddr = round_up(vaddr + 1, PUD_SIZE);
remain_entropy -= entropy;
}
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index f200a4ec044e..d3db28f2f811 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -14,6 +14,7 @@ config XTENSA
select ARCH_HAS_DMA_SET_UNCACHED if MMU
select ARCH_HAS_STRNCPY_FROM_USER if !KASAN
select ARCH_HAS_STRNLEN_USER
+ select ARCH_NEED_CMPXCHG_1_EMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 675a11ea8de7..95e33a913962 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -15,6 +15,7 @@
#include <linux/bits.h>
#include <linux/stringify.h>
+#include <linux/cmpxchg-emu.h>
/*
* cmpxchg
@@ -74,6 +75,7 @@ static __inline__ unsigned long
__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
{
switch (size) {
+ case 1: return cmpxchg_emu_u8(ptr, old, new);
case 4: return __cmpxchg_u32(ptr, old, new);
default: __cmpxchg_called_with_bad_pointer();
return old;