From ad14c19242b5a5f87aa86c4c930e668121de2809 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 28 Aug 2020 11:18:22 +0800 Subject: arm64: fix some spelling mistakes in the comments by codespell arch/arm64/include/asm/cpu_ops.h:24: necesary ==> necessary arch/arm64/include/asm/kvm_arm.h:69: maintainance ==> maintenance arch/arm64/include/asm/cpufeature.h:361: capabilties ==> capabilities arch/arm64/kernel/perf_regs.c:19: compatability ==> compatibility arch/arm64/kernel/smp_spin_table.c:86: endianess ==> endianness arch/arm64/kernel/smp_spin_table.c:88: endianess ==> endianness arch/arm64/kvm/vgic/vgic-mmio-v3.c:1004: targetting ==> targeting arch/arm64/kvm/vgic/vgic-mmio-v3.c:1005: targetting ==> targeting Signed-off-by: Xiaoming Ni Link: https://lore.kernel.org/r/20200828031822.35928-1-nixiaoming@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu_ops.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/kvm_arm.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index d28e8f37d3b4..e95c4df83911 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -21,7 +21,7 @@ * mechanism for doing so, tests whether it is possible to boot * the given CPU. * @cpu_boot: Boots a cpu into the kernel. - * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary * synchronisation. Called from the cpu being booted. * @cpu_can_disable: Determines whether a CPU can be disabled based on * mechanism-specific information. diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 89b4f0142c28..3a42dc8e697c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -358,7 +358,7 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) } /* - * Generic helper for handling capabilties with multiple (match,enable) pairs + * Generic helper for handling capabilities with multiple (match,enable) pairs * of call backs, sharing the same capability bit. * Iterate over each entry to see if at least one matches. */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1da8e3dc4455..32acf95d49c7 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -66,7 +66,7 @@ * TWI: Trap WFI * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain - * FB: Force broadcast of all maintainance operations + * FB: Force broadcast of all maintenance operations * AMO: Override CPSR.A and enable signaling with VA * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF -- cgit v1.2.3 From 120dc60d0bdbadcad7460222f74c9ed15cdeb73e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 25 Aug 2020 15:54:40 +0200 Subject: arm64: get rid of TEXT_OFFSET TEXT_OFFSET serves no purpose, and for this reason, it was redefined as 0x0 in the v5.8 timeframe. Since this does not appear to have caused any issues that require us to revisit that decision, let's get rid of the macro entirely, along with any references to it. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200825135440.11288-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 6 ------ arch/arm64/include/asm/boot.h | 3 +-- arch/arm64/include/asm/kernel-pgtable.h | 2 +- arch/arm64/include/asm/memory.h | 2 +- arch/arm64/kernel/Makefile | 2 -- arch/arm64/kernel/head.S | 16 ++++++---------- arch/arm64/kernel/image.h | 1 - arch/arm64/kernel/vmlinux.lds.S | 4 ++-- drivers/firmware/efi/libstub/Makefile | 1 - drivers/firmware/efi/libstub/arm64-stub.c | 6 +++--- 10 files changed, 14 insertions(+), 29 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 130569f90c54..0fd4c1be4f64 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -11,7 +11,6 @@ # Copyright (C) 1995-2001 by Russell King LDFLAGS_vmlinux :=--no-undefined -X -CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -132,9 +131,6 @@ endif # Default value head-y := arch/arm64/kernel/head.o -# The byte offset of the kernel image in RAM from the start of RAM. -TEXT_OFFSET := 0x0 - ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 else @@ -145,8 +141,6 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) -export TEXT_OFFSET - core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index c7f67da13cd9..3e7943fd17a4 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -13,8 +13,7 @@ #define MAX_FDT_SIZE SZ_2M /* - * arm64 requires the kernel image to placed - * TEXT_OFFSET bytes beyond a 2 MB aligned base + * arm64 requires the kernel image to placed at a 2 MB aligned base address */ #define MIN_KIMG_ALIGN SZ_2M diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 329fb15f6bac..19ca76ea60d9 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -86,7 +86,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index afa722504bfd..20a9b322d342 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -169,7 +169,7 @@ extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +/* the virtual base of the kernel image */ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a561cbb91d4d..4197ef2fb22e 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) -AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 037421c66b14..d8d9caf02834 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -36,14 +36,10 @@ #include "efi-header.S" -#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) +#define __PHYS_OFFSET KERNEL_START -#if (TEXT_OFFSET & 0xfff) != 0 -#error TEXT_OFFSET must be at least 4KB aligned -#elif (PAGE_OFFSET & 0x1fffff) != 0 +#if (PAGE_OFFSET & 0x1fffff) != 0 #error PAGE_OFFSET must be at least 2MB aligned -#elif TEXT_OFFSET > 0x1fffff -#error TEXT_OFFSET must be less than 2MB #endif /* @@ -55,7 +51,7 @@ * x0 = physical address to the FDT blob. * * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET + TEXT_OFFSET). + * __pa(PAGE_OFFSET). * * Note that the callee-saved registers are used for storing variables * that are useful before the MMU is enabled. The allocations are described @@ -77,7 +73,7 @@ _head: b primary_entry // branch to kernel start, magic .long 0 // reserved #endif - le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad 0 // Image load offset from start of RAM, little-endian le64sym _kernel_size_le // Effective size of kernel image, little-endian le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved @@ -382,7 +378,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * Map the kernel image (starting with PHYS_OFFSET). */ adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) + mov_q x5, KIMAGE_VADDR // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD adrp x6, _end // runtime __pa(_end) @@ -474,7 +470,7 @@ SYM_FUNC_END(__primary_switched) .pushsection ".rodata", "a" SYM_DATA_START(kimage_vaddr) - .quad _text - TEXT_OFFSET + .quad _text SYM_DATA_END(kimage_vaddr) EXPORT_SYMBOL(kimage_vaddr) .popsection diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c7d38c660372..7bc3ba897901 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -62,7 +62,6 @@ */ #define HEAD_SYMBOLS \ DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ - DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #endif /* __ARM64_KERNEL_IMAGE_H */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7cba7623fcec..82801d98a2b7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -105,7 +105,7 @@ SECTIONS *(.eh_frame) } - . = KIMAGE_VADDR + TEXT_OFFSET; + . = KIMAGE_VADDR; .head.text : { _text = .; @@ -274,4 +274,4 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 296b18fbd7a2..5a80cf6bd606 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -64,7 +64,6 @@ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_X86) += x86-stub.o CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # # For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e5bfac79e5ac..fc178398f1ac 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -77,7 +77,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); - *reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align(); + *reserve_size = kernel_memsize; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { /* @@ -91,7 +91,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } if (status != EFI_SUCCESS) { - if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align())) { + if (IS_ALIGNED((u64)_text, min_kimg_align())) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. @@ -111,7 +111,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } } - *image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align(); + *image_addr = *reserve_addr; memcpy((void *)*image_addr, _text, kernel_size); return EFI_SUCCESS; -- cgit v1.2.3 From b65399f6111b03df870d8daa75b8d140c0de93f4 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 10:23:02 +0530 Subject: arm64/mm: Change THP helpers to comply with generic MM semantics pmd_present() and pmd_trans_huge() are expected to behave in the following manner during various phases of a given PMD. It is derived from a previous detailed discussion on this topic [1] and present THP documentation [2]. pmd_present(pmd): - Returns true if pmd refers to system RAM with a valid pmd_page(pmd) - Returns false if pmd refers to a migration or swap entry pmd_trans_huge(pmd): - Returns true if pmd refers to system RAM and is a trans huge mapping ------------------------------------------------------------------------- | PMD states | pmd_present | pmd_trans_huge | ------------------------------------------------------------------------- | Mapped | Yes | Yes | ------------------------------------------------------------------------- | Splitting | Yes | Yes | ------------------------------------------------------------------------- | Migration/Swap | No | No | ------------------------------------------------------------------------- The problem: PMD is first invalidated with pmdp_invalidate() before it's splitting. This invalidation clears PMD_SECT_VALID as below. PMD Split -> pmdp_invalidate() -> pmd_mkinvalid -> Clears PMD_SECT_VALID Once PMD_SECT_VALID gets cleared, it results in pmd_present() return false on the PMD entry. It will need another bit apart from PMD_SECT_VALID to re- affirm pmd_present() as true during the THP split process. To comply with above mentioned semantics, pmd_trans_huge() should also check pmd_present() first before testing presence of an actual transparent huge mapping. The solution: Ideally PMD_TYPE_SECT should have been used here instead. But it shares the bit position with PMD_SECT_VALID which is used for THP invalidation. Hence it will not be there for pmd_present() check after pmdp_invalidate(). A new software defined PMD_PRESENT_INVALID (bit 59) can be set on the PMD entry during invalidation which can help pmd_present() return true and in recognizing the fact that it still points to memory. This bit is transient. During the split process it will be overridden by a page table page representing normal pages in place of erstwhile huge page. Other pmdp_invalidate() callers always write a fresh PMD value on the entry overriding this transient PMD_PRESENT_INVALID bit, which makes it safe. [1]: https://lkml.org/lkml/2018/10/17/231 [2]: https://www.kernel.org/doc/Documentation/vm/transhuge.txt Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599627183-14453-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 7 +++++++ arch/arm64/include/asm/pgtable.h | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 4d867c6446c4..2df4b75fce3c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -19,6 +19,13 @@ #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ +/* + * This bit indicates that the entry is present i.e. pmd_page() + * still points to a valid huge page in memory even if the pmd + * has been invalidated. + */ +#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d5d3fbe73953..d8258ae8fce0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -145,6 +145,18 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) &= ~pgprot_val(prot); + return pmd; +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) |= pgprot_val(prot); + return pmd; +} + static inline pte_t pte_wrprotect(pte_t pte) { pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -363,15 +375,24 @@ static inline int pmd_protnone(pmd_t pmd) } #endif +#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) + +static inline int pmd_present(pmd_t pmd) +{ + return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); +} + /* * THP definitions. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -381,7 +402,14 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); + pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); + + return pmd; +} #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -- cgit v1.2.3 From 53fa117bb33c9ebc4c0746b3830e273f5e9b97b7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 10:23:03 +0530 Subject: arm64/mm: Enable THP migration In certain page migration situations, a THP page can be migrated without being split into it's constituent subpages. This saves time required to split a THP and put it back together when required. But it also saves an wider address range translation covered by a single TLB entry, reducing future page fault costs. A previous patch changed platform THP helpers per generic memory semantics, clearing the path for THP migration support. This adds two more THP helpers required to create PMD migration swap entries. Now enable THP migration via ARCH_ENABLE_THP_MIGRATION. Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599627183-14453-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ arch/arm64/include/asm/pgtable.h | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..e21b94061780 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1876,6 +1876,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on HUGETLB_PAGE && MIGRATION +config ARCH_ENABLE_THP_MIGRATION + def_bool y + depends on TRANSPARENT_HUGEPAGE + menu "Power management options" source "kernel/power/Kconfig" diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d8258ae8fce0..bc68da9f5706 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -875,6 +875,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(swp) __pmd((swp).val) +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ + /* * Ensure that there are not more swap files than can be encoded in the kernel * PTEs. -- cgit v1.2.3 From 4e56de82d4ec9d98ffdc9e0387d8fdecbf496226 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 11:18:55 +0530 Subject: arm64/cpuinfo: Define HWCAP name arrays per their actual bit definitions HWCAP name arrays (hwcap_str, compat_hwcap_str, compat_hwcap2_str) that are scanned for /proc/cpuinfo are detached from their bit definitions making it vulnerable and difficult to correlate. It is also bit problematic because during /proc/cpuinfo dump these arrays get traversed sequentially assuming they reflect and match actual HWCAP bit sequence, to test various features for a given CPU. This redefines name arrays per their HWCAP bit definitions . It also warns after detecting any feature which is not expected on arm64. Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Dave Martin Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599630535-29337-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/hwcap.h | 9 +++ arch/arm64/kernel/cpuinfo.c | 177 +++++++++++++++++++++-------------------- 2 files changed, 101 insertions(+), 85 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 22f73fe09030..6493a4c63a2f 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -8,18 +8,27 @@ #include #include +#define COMPAT_HWCAP_SWP (1 << 0) #define COMPAT_HWCAP_HALF (1 << 1) #define COMPAT_HWCAP_THUMB (1 << 2) +#define COMPAT_HWCAP_26BIT (1 << 3) #define COMPAT_HWCAP_FAST_MULT (1 << 4) +#define COMPAT_HWCAP_FPA (1 << 5) #define COMPAT_HWCAP_VFP (1 << 6) #define COMPAT_HWCAP_EDSP (1 << 7) +#define COMPAT_HWCAP_JAVA (1 << 8) +#define COMPAT_HWCAP_IWMMXT (1 << 9) +#define COMPAT_HWCAP_CRUNCH (1 << 10) +#define COMPAT_HWCAP_THUMBEE (1 << 11) #define COMPAT_HWCAP_NEON (1 << 12) #define COMPAT_HWCAP_VFPv3 (1 << 13) +#define COMPAT_HWCAP_VFPV3D16 (1 << 14) #define COMPAT_HWCAP_TLS (1 << 15) #define COMPAT_HWCAP_VFPv4 (1 << 16) #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_VFPD32 (1 << 19) #define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index d0076c2159e6..25113245825c 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -43,94 +43,92 @@ static const char *icache_policy_str[] = { unsigned long __icache_flags; static const char *const hwcap_str[] = { - "fp", - "asimd", - "evtstrm", - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - "atomics", - "fphp", - "asimdhp", - "cpuid", - "asimdrdm", - "jscvt", - "fcma", - "lrcpc", - "dcpop", - "sha3", - "sm3", - "sm4", - "asimddp", - "sha512", - "sve", - "asimdfhm", - "dit", - "uscat", - "ilrcpc", - "flagm", - "ssbs", - "sb", - "paca", - "pacg", - "dcpodp", - "sve2", - "sveaes", - "svepmull", - "svebitperm", - "svesha3", - "svesm4", - "flagm2", - "frint", - "svei8mm", - "svef32mm", - "svef64mm", - "svebf16", - "i8mm", - "bf16", - "dgh", - "rng", - "bti", - /* reserved for "mte" */ - NULL + [KERNEL_HWCAP_FP] = "fp", + [KERNEL_HWCAP_ASIMD] = "asimd", + [KERNEL_HWCAP_EVTSTRM] = "evtstrm", + [KERNEL_HWCAP_AES] = "aes", + [KERNEL_HWCAP_PMULL] = "pmull", + [KERNEL_HWCAP_SHA1] = "sha1", + [KERNEL_HWCAP_SHA2] = "sha2", + [KERNEL_HWCAP_CRC32] = "crc32", + [KERNEL_HWCAP_ATOMICS] = "atomics", + [KERNEL_HWCAP_FPHP] = "fphp", + [KERNEL_HWCAP_ASIMDHP] = "asimdhp", + [KERNEL_HWCAP_CPUID] = "cpuid", + [KERNEL_HWCAP_ASIMDRDM] = "asimdrdm", + [KERNEL_HWCAP_JSCVT] = "jscvt", + [KERNEL_HWCAP_FCMA] = "fcma", + [KERNEL_HWCAP_LRCPC] = "lrcpc", + [KERNEL_HWCAP_DCPOP] = "dcpop", + [KERNEL_HWCAP_SHA3] = "sha3", + [KERNEL_HWCAP_SM3] = "sm3", + [KERNEL_HWCAP_SM4] = "sm4", + [KERNEL_HWCAP_ASIMDDP] = "asimddp", + [KERNEL_HWCAP_SHA512] = "sha512", + [KERNEL_HWCAP_SVE] = "sve", + [KERNEL_HWCAP_ASIMDFHM] = "asimdfhm", + [KERNEL_HWCAP_DIT] = "dit", + [KERNEL_HWCAP_USCAT] = "uscat", + [KERNEL_HWCAP_ILRCPC] = "ilrcpc", + [KERNEL_HWCAP_FLAGM] = "flagm", + [KERNEL_HWCAP_SSBS] = "ssbs", + [KERNEL_HWCAP_SB] = "sb", + [KERNEL_HWCAP_PACA] = "paca", + [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_DCPODP] = "dcpodp", + [KERNEL_HWCAP_SVE2] = "sve2", + [KERNEL_HWCAP_SVEAES] = "sveaes", + [KERNEL_HWCAP_SVEPMULL] = "svepmull", + [KERNEL_HWCAP_SVEBITPERM] = "svebitperm", + [KERNEL_HWCAP_SVESHA3] = "svesha3", + [KERNEL_HWCAP_SVESM4] = "svesm4", + [KERNEL_HWCAP_FLAGM2] = "flagm2", + [KERNEL_HWCAP_FRINT] = "frint", + [KERNEL_HWCAP_SVEI8MM] = "svei8mm", + [KERNEL_HWCAP_SVEF32MM] = "svef32mm", + [KERNEL_HWCAP_SVEF64MM] = "svef64mm", + [KERNEL_HWCAP_SVEBF16] = "svebf16", + [KERNEL_HWCAP_I8MM] = "i8mm", + [KERNEL_HWCAP_BF16] = "bf16", + [KERNEL_HWCAP_DGH] = "dgh", + [KERNEL_HWCAP_RNG] = "rng", + [KERNEL_HWCAP_BTI] = "bti", }; #ifdef CONFIG_COMPAT +#define COMPAT_KERNEL_HWCAP(x) const_ilog2(COMPAT_HWCAP_ ## x) static const char *const compat_hwcap_str[] = { - "swp", - "half", - "thumb", - "26bit", - "fastmult", - "fpa", - "vfp", - "edsp", - "java", - "iwmmxt", - "crunch", - "thumbee", - "neon", - "vfpv3", - "vfpv3d16", - "tls", - "vfpv4", - "idiva", - "idivt", - "vfpd32", - "lpae", - "evtstrm", - NULL + [COMPAT_KERNEL_HWCAP(SWP)] = "swp", + [COMPAT_KERNEL_HWCAP(HALF)] = "half", + [COMPAT_KERNEL_HWCAP(THUMB)] = "thumb", + [COMPAT_KERNEL_HWCAP(26BIT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(FAST_MULT)] = "fastmult", + [COMPAT_KERNEL_HWCAP(FPA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(VFP)] = "vfp", + [COMPAT_KERNEL_HWCAP(EDSP)] = "edsp", + [COMPAT_KERNEL_HWCAP(JAVA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(IWMMXT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(CRUNCH)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(THUMBEE)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(NEON)] = "neon", + [COMPAT_KERNEL_HWCAP(VFPv3)] = "vfpv3", + [COMPAT_KERNEL_HWCAP(VFPV3D16)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(TLS)] = "tls", + [COMPAT_KERNEL_HWCAP(VFPv4)] = "vfpv4", + [COMPAT_KERNEL_HWCAP(IDIVA)] = "idiva", + [COMPAT_KERNEL_HWCAP(IDIVT)] = "idivt", + [COMPAT_KERNEL_HWCAP(VFPD32)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(LPAE)] = "lpae", + [COMPAT_KERNEL_HWCAP(EVTSTRM)] = "evtstrm", }; +#define COMPAT_KERNEL_HWCAP2(x) const_ilog2(COMPAT_HWCAP2_ ## x) static const char *const compat_hwcap2_str[] = { - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL + [COMPAT_KERNEL_HWCAP2(AES)] = "aes", + [COMPAT_KERNEL_HWCAP2(PMULL)] = "pmull", + [COMPAT_KERNEL_HWCAP2(SHA1)] = "sha1", + [COMPAT_KERNEL_HWCAP2(SHA2)] = "sha2", + [COMPAT_KERNEL_HWCAP2(CRC32)] = "crc32", }; #endif /* CONFIG_COMPAT */ @@ -166,16 +164,25 @@ static int c_show(struct seq_file *m, void *v) seq_puts(m, "Features\t:"); if (compat) { #ifdef CONFIG_COMPAT - for (j = 0; compat_hwcap_str[j]; j++) - if (compat_elf_hwcap & (1 << j)) + for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) { + if (compat_elf_hwcap & (1 << j)) { + /* + * Warn once if any feature should not + * have been present on arm64 platform. + */ + if (WARN_ON_ONCE(!compat_hwcap_str[j])) + continue; + seq_printf(m, " %s", compat_hwcap_str[j]); + } + } - for (j = 0; compat_hwcap2_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++) if (compat_elf_hwcap2 & (1 << j)) seq_printf(m, " %s", compat_hwcap2_str[j]); #endif /* CONFIG_COMPAT */ } else { - for (j = 0; hwcap_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(hwcap_str); j++) if (cpu_have_feature(j)) seq_printf(m, " %s", hwcap_str[j]); } -- cgit v1.2.3 From 11e339d53a739e4cf885c1e2a843a4004204d271 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Sep 2020 19:59:34 +1000 Subject: arm64/mm: Remove CONT_RANGE_OFFSET The macro was introduced by commit ("arm64: PTE/PMD contiguous bit definition") at the beginning. It's only used by commit <348a65cdcbbf> ("arm64: Mark kernel page ranges contiguous"), which was reverted later by commit <667c27597ca8>. This makes the macro unused. This removes the unused macro (CONT_RANGE_OFFSET). Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20200910095936.20307-1-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-hwdef.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d400a4d9aee2..8a399e666837 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -98,8 +98,6 @@ #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -/* the numerical offset of the PTE within a range of CONT_PTES */ -#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) /* * Hardware page table definitions. -- cgit v1.2.3 From c0d6de327f18d39ef759aa883bffc3c32bc69015 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Sep 2020 19:59:35 +1000 Subject: arm64/mm: Unify CONT_PTE_SHIFT CONT_PTE_SHIFT actually depends on CONFIG_ARM64_CONT_SHIFT. It's reasonable to reflect the dependency: * This renames CONFIG_ARM64_CONT_SHIFT to CONFIG_ARM64_CONT_PTE_SHIFT, so that we can introduce CONFIG_ARM64_CONT_PMD_SHIFT later. * CONT_{SHIFT, SIZE, MASK}, defined in page-def.h are removed as they are not used by anyone. * CONT_PTE_SHIFT is determined by CONFIG_ARM64_CONT_PTE_SHIFT. Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20200910095936.20307-2-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/page-def.h | 5 ----- arch/arm64/include/asm/pgtable-hwdef.h | 4 +--- 3 files changed, 2 insertions(+), 9 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e21b94061780..4f3b7b24b5e5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -211,7 +211,7 @@ config ARM64_PAGE_SHIFT default 14 if ARM64_16K_PAGES default 12 -config ARM64_CONT_SHIFT +config ARM64_CONT_PTE_SHIFT int default 5 if ARM64_64K_PAGES default 7 if ARM64_16K_PAGES diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index f99d48ecbeef..2403f7b4cdbf 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,13 +11,8 @@ #include /* PAGE_SHIFT determines the page size */ -/* CONT_SHIFT determines the number of pages which can be tracked together */ #define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT -#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) -#define CONT_MASK (~(CONT_SIZE-1)) - #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 8a399e666837..6c9c67f62551 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -81,14 +81,12 @@ /* * Contiguous page definitions. */ +#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) #ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PTE_SHIFT (5 + PAGE_SHIFT) #define CONT_PMD_SHIFT (5 + PMD_SHIFT) #elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PTE_SHIFT (7 + PAGE_SHIFT) #define CONT_PMD_SHIFT (5 + PMD_SHIFT) #else -#define CONT_PTE_SHIFT (4 + PAGE_SHIFT) #define CONT_PMD_SHIFT (4 + PMD_SHIFT) #endif -- cgit v1.2.3 From e676594115f0bcc12d4791f9ee919e20d8d750ee Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Sep 2020 19:59:36 +1000 Subject: arm64/mm: Unify CONT_PMD_SHIFT Similar to how CONT_PTE_SHIFT is determined, this introduces a new kernel option (CONFIG_CONT_PMD_SHIFT) to determine CONT_PMD_SHIFT. Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20200910095936.20307-3-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 ++++++ arch/arm64/include/asm/pgtable-hwdef.h | 10 ++-------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4f3b7b24b5e5..609629e36779 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -217,6 +217,12 @@ config ARM64_CONT_PTE_SHIFT default 7 if ARM64_16K_PAGES default 4 +config ARM64_CONT_PMD_SHIFT + int + default 5 if ARM64_64K_PAGES + default 5 if ARM64_16K_PAGES + default 4 + config ARCH_MMAP_RND_BITS_MIN default 14 if ARM64_64K_PAGES default 16 if ARM64_16K_PAGES diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 6c9c67f62551..94b3f2ac2e9d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -82,17 +82,11 @@ * Contiguous page definitions. */ #define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) -#ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#else -#define CONT_PMD_SHIFT (4 + PMD_SHIFT) -#endif - #define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) + +#define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -- cgit v1.2.3 From 93396936ed0ce2c6f44140bd14728611d0bb065e Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:51 +0530 Subject: arm64: kprobe: add checks for ARMv8.3-PAuth combined instructions Currently the ARMv8.3-PAuth combined branch instructions (braa, retaa etc.) are not simulated for out-of-line execution with a handler. Hence the uprobe of such instructions leads to kernel warnings in a loop as they are not explicitly checked and fall into INSN_GOOD categories. Other combined instructions like LDRAA and LDRBB can be probed. The issue of the combined branch instructions is fixed by adding group definitions of all such instructions and rejecting their probes. The instruction groups added are br_auth(braa, brab, braaz and brabz), blr_auth(blraa, blrab, blraaz and blrabz), ret_auth(retaa and retab) and eret_auth(eretaa and eretab). Warning log: WARNING: CPU: 0 PID: 156 at arch/arm64/kernel/probes/uprobes.c:182 uprobe_single_step_handler+0x34/0x50 Modules linked in: CPU: 0 PID: 156 Comm: func Not tainted 5.9.0-rc3 #188 Hardware name: Foundation-v8A (DT) pstate: 804003c9 (Nzcv DAIF +PAN -UAO BTYPE=--) pc : uprobe_single_step_handler+0x34/0x50 lr : single_step_handler+0x70/0xf8 sp : ffff800012af3e30 x29: ffff800012af3e30 x28: ffff000878723b00 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 x23: 0000000060001000 x22: 00000000cb000022 x21: ffff800012065ce8 x20: ffff800012af3ec0 x19: ffff800012068d50 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffff800010085c90 x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff80001205a9c8 x5 : ffff80001205a000 x4 : ffff80001233db80 x3 : ffff8000100a7a60 x2 : 0020000000000003 x1 : 0000fffffffff008 x0 : ffff800012af3ec0 Call trace: uprobe_single_step_handler+0x34/0x50 single_step_handler+0x70/0xf8 do_debug_exception+0xb8/0x130 el0_sync_handler+0x138/0x1b8 el0_sync+0x158/0x180 Fixes: 74afda4016a7 ("arm64: compile the kernel with ptrauth return address signing") Fixes: 04ca3204fa09 ("arm64: enable pointer authentication") Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-2-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 4 ++++ arch/arm64/kernel/insn.c | 5 ++++- arch/arm64/kernel/probes/decode-insn.c | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 0bc46149e491..4b39293d0f72 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -359,9 +359,13 @@ __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) __AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) +__AARCH64_INSN_FUNCS(br_auth, 0xFEFFF800, 0xD61F0800) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) +__AARCH64_INSN_FUNCS(blr_auth, 0xFEFFF800, 0xD63F0800) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(ret_auth, 0xFFFFFBFF, 0xD65F0BFF) __AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(eret_auth, 0xFFFFFBFF, 0xD69F0BFF) __AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) __AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index a107375005bc..ccc8c9e22b25 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -176,7 +176,7 @@ bool __kprobes aarch64_insn_uses_literal(u32 insn) bool __kprobes aarch64_insn_is_branch(u32 insn) { - /* b, bl, cb*, tb*, b.cond, br, blr */ + /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */ return aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) || @@ -185,8 +185,11 @@ bool __kprobes aarch64_insn_is_branch(u32 insn) aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) || aarch64_insn_is_ret(insn) || + aarch64_insn_is_ret_auth(insn) || aarch64_insn_is_br(insn) || + aarch64_insn_is_br_auth(insn) || aarch64_insn_is_blr(insn) || + aarch64_insn_is_blr_auth(insn) || aarch64_insn_is_bcond(insn); } diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 263d5fba4c8a..c541fb48886e 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -29,7 +29,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) aarch64_insn_is_msr_imm(insn) || aarch64_insn_is_msr_reg(insn) || aarch64_insn_is_exception(insn) || - aarch64_insn_is_eret(insn)) + aarch64_insn_is_eret(insn) || + aarch64_insn_is_eret_auth(insn)) return false; /* -- cgit v1.2.3 From 4ef333b2d10680b5d966a733ed7171f72164fcd5 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:52 +0530 Subject: arm64: traps: Allow force_signal_inject to pass esr error code Some error signal need to pass proper ARM esr error code to userspace to better identify the cause of the signal. So the function force_signal_inject is extended to pass this as a parameter. The existing code is not affected by this change. Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-3-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/traps.h | 2 +- arch/arm64/kernel/fpsimd.c | 4 ++-- arch/arm64/kernel/traps.c | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index cee5928e1b7d..d96dc2c7c09d 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -24,7 +24,7 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -void force_signal_inject(int signal, int code, unsigned long address); +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str); void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 55c8f3ec6705..77484359d44a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -312,7 +312,7 @@ static void fpsimd_save(void) * re-enter user with corrupt state. * There's no way to recover, so kill it: */ - force_signal_inject(SIGKILL, SI_KERNEL, 0); + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); return; } @@ -936,7 +936,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..29fd00fe94f2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -412,7 +412,7 @@ exit: return fn ? fn(regs, instr) : 1; } -void force_signal_inject(int signal, int code, unsigned long address) +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err) { const char *desc; struct pt_regs *regs = current_pt_regs(); @@ -438,7 +438,7 @@ void force_signal_inject(int signal, int code, unsigned long address) signal = SIGKILL; } - arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0); + arm64_notify_die(desc, regs, signal, code, (void __user *)address, err); } /* @@ -455,7 +455,7 @@ void arm64_notify_segfault(unsigned long addr) code = SEGV_ACCERR; mmap_read_unlock(current->mm); - force_signal_inject(SIGSEGV, code, addr); + force_signal_inject(SIGSEGV, code, addr, 0); } void do_undefinstr(struct pt_regs *regs) @@ -468,14 +468,14 @@ void do_undefinstr(struct pt_regs *regs) return; BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_undefinstr); void do_bti(struct pt_regs *regs) { BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_bti); @@ -528,7 +528,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) __user_cache_maint("ic ivau", address, ret); break; default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } @@ -581,7 +581,7 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) sysreg = esr_sys64_to_sysreg(esr); if (do_emulate_mrs(regs, sysreg, rt) != 0) - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } static void wfi_handler(unsigned int esr, struct pt_regs *regs) -- cgit v1.2.3 From e16aeb072682d3dcdbdad452c974baa0d2b0c6db Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:53 +0530 Subject: arm64: ptrauth: Introduce Armv8.3 pointer authentication enhancements Some Armv8.3 Pointer Authentication enhancements have been introduced which are mandatory for Armv8.6 and optional for Armv8.3. These features are, * ARMv8.3-PAuth2 - An enhanced PAC generation logic is added which hardens finding the correct PAC value of the authenticated pointer. * ARMv8.3-FPAC - Fault is generated now when the ptrauth authentication instruction fails in authenticating the PAC present in the address. This is different from earlier case when such failures just adds an error code in the top byte and waits for subsequent load/store to abort. The ptrauth instructions which may cause this fault are autiasp, retaa etc. The above features are now represented by additional configurations for the Address Authentication cpufeature and a new ESR exception class. The userspace fault received in the kernel due to ARMv8.3-FPAC is treated as Illegal instruction and hence signal SIGILL is injected with ILL_ILLOPN as the signal code. Note that this is different from earlier ARMv8.3 ptrauth where signal SIGSEGV is issued due to Pointer authentication failures. The in-kernel PAC fault causes kernel to crash. Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-4-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 4 +++- arch/arm64/include/asm/exception.h | 1 + arch/arm64/include/asm/sysreg.h | 24 ++++++++++++++++-------- arch/arm64/kernel/entry-common.c | 21 +++++++++++++++++++++ arch/arm64/kernel/traps.c | 12 ++++++++++++ 5 files changed, 53 insertions(+), 9 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 035003acfa87..22c81f1edda2 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -35,7 +35,9 @@ #define ESR_ELx_EC_SYS64 (0x18) #define ESR_ELx_EC_SVE (0x19) #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ -/* Unallocated EC: 0x1b - 0x1E */ +/* Unallocated EC: 0x1B */ +#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ +/* Unallocated EC: 0x1D - 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 7577a754d443..99b9383cd036 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -47,4 +47,5 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 554a7e8ecb07..b738bc793369 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -636,14 +636,22 @@ #define ID_AA64ISAR1_APA_SHIFT 4 #define ID_AA64ISAR1_DPB_SHIFT 0 -#define ID_AA64ISAR1_APA_NI 0x0 -#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_API_NI 0x0 -#define ID_AA64ISAR1_API_IMP_DEF 0x1 -#define ID_AA64ISAR1_GPA_NI 0x0 -#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_GPI_NI 0x0 -#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 +#define ID_AA64ISAR1_APA_NI 0x0 +#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 +#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_API_NI 0x0 +#define ID_AA64ISAR1_API_IMP_DEF 0x1 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_GPA_NI 0x0 +#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_GPI_NI 0x0 +#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index d3be9dbf5490..43d4c329775f 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -66,6 +66,13 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(el1_dbg); +static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +{ + local_daif_inherit(regs); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el1_fpac); + asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -92,6 +99,9 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el1_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el1_fpac(regs, esr); + break; default: el1_inv(regs, esr); } @@ -227,6 +237,14 @@ static void notrace el0_svc(struct pt_regs *regs) } NOKPROBE_SYMBOL(el0_svc); +static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el0_fpac); + asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -272,6 +290,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el0_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el0_fpac(regs, esr); + break; default: el0_inv(regs, esr); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 29fd00fe94f2..b24f81197a68 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -479,6 +479,17 @@ void do_bti(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_bti); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr) +{ + /* + * Unexpected FPAC exception or pointer authentication failure in + * the kernel: kill the task before it does any more harm. + */ + BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); +} +NOKPROBE_SYMBOL(do_ptrauth_fault); + #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -775,6 +786,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", [ESR_ELx_EC_SVE] = "SVE", [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", + [ESR_ELx_EC_FPAC] = "FPAC", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", -- cgit v1.2.3 From 2cf660eb81e93f58ae31215af67fee8499901dd9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 14 Sep 2020 09:47:30 +1000 Subject: arm64/mm: Refactor {pgd, pud, pmd, pte}_ERROR() The function __{pgd, pud, pmd, pte}_error() are introduced so that they can be called by {pgd, pud, pmd, pte}_ERROR(). However, some of the functions could never be called when the corresponding page table level isn't enabled. For example, __{pud, pmd}_error() are unused when PUD and PMD are folded to PGD. This removes __{pgd, pud, pmd, pte}_error() and call pr_err() from {pgd, pud, pmd, pte}_ERROR() directly, similar to what x86/powerpc are doing. With this, the code looks a bit simplified either. Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20200913234730.23145-1-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 17 ++++++++--------- arch/arm64/kernel/traps.c | 20 -------------------- 2 files changed, 8 insertions(+), 29 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d5d3fbe73953..e0ab81923c30 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,11 +35,6 @@ extern struct page *vmemmap; -extern void __pte_error(const char *file, int line, unsigned long val); -extern void __pmd_error(const char *file, int line, unsigned long val); -extern void __pud_error(const char *file, int line, unsigned long val); -extern void __pgd_error(const char *file, int line, unsigned long val); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -57,7 +52,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) /* * Macros to convert between a physical address and its placement in a @@ -541,7 +537,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #if CONFIG_PGTABLE_LEVELS > 2 -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) @@ -608,7 +605,8 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #if CONFIG_PGTABLE_LEVELS > 3 -#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) @@ -667,7 +665,8 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index af25cedf54e9..1e48b869be21 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -935,26 +935,6 @@ asmlinkage void enter_from_user_mode(void) } NOKPROBE_SYMBOL(enter_from_user_mode); -void __pte_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pte %016lx.\n", file, line, val); -} - -void __pmd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); -} - -void __pud_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pud %016lx.\n", file, line, val); -} - -void __pgd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); -} - /* GENERIC_BUG traps */ int is_valid_bugaddr(unsigned long addr) -- cgit v1.2.3 From 5fd39dc220279108131edbc85832a5fef821a826 Mon Sep 17 00:00:00 2001 From: Clint Sbisa Date: Fri, 18 Sep 2020 03:33:12 +0000 Subject: arm64: Enable PCI write-combine resources under sysfs This change exposes write-combine mappings under sysfs for prefetchable PCI resources on arm64. Originally, the usage of "write combine" here was driven by the x86 definition of write combine. This definition is specific to x86 and does not generalize to other architectures. However, the usage of WC has mutated to "write combine" semantics, which is implemented differently on each arch. Generally, prefetchable BARs are accepted to allow speculative accesses, write combining, and re-ordering-- from the PCI perspective, this means there are no read side effects. (This contradicts the PCI spec which allows prefetchable BARs to have read side effects, but this definition is ill-advised as it is impossible to meet.) On x86, prefetchable BARs are mapped as WC as originally defined (with some conditionals on arch features). On arm64, WC is taken to mean normal non-cacheable memory. In practice, write combine semantics are used to minimize write operations. A common usage of this is minimizing PCI TLPs which can significantly improve performance with PCI devices. In order to provide the same benefits to userspace, we need to allow userspace to map prefetchable BARs with write combine semantics. The resourceX_wc mapping is used today by userspace programs and libraries. While this model is flawed as "write combine" is very ill-defined, it is already used by multiple non-x86 archs to expose write combine semantics to user space. We enable this on arm64 to give userspace on arm64 an equivalent mechanism for utilizing write combining with PCI devices. Signed-off-by: Clint Sbisa Acked-by: Catalin Marinas Acked-by: Lorenzo Pieralisi Acked-by: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Jason Gunthorpe Cc: Lorenzo Pieralisi Cc: Will Deacon Link: https://lore.kernel.org/r/20200918033312.ddfpibgfylfjpex2@amazon.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 70b323cf8300..b33ca260e3c9 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -17,6 +17,7 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) +#define arch_can_pci_mmap_wc() 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; -- cgit v1.2.3 From baa2cd417053cb674deb90ee66b88afea0517335 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Sep 2020 16:34:08 +0100 Subject: arm64: stacktrace: Make stack walk callback consistent with generic code As with the generic arch_stack_walk() code the arm64 stack walk code takes a callback that is called per stack frame. Currently the arm64 code always passes a struct stackframe to the callback and the generic code just passes the pc, however none of the users ever reference anything in the struct other than the pc value. The arm64 code also uses a return type of int while the generic code uses a return type of bool though in both cases the return value is a boolean value and the sense is inverted between the two. In order to reduce code duplication when arm64 is converted to use arch_stack_walk() change the signature and return sense of the arm64 specific callback to match that of the generic code. Signed-off-by: Mark Brown Reviewed-by: Miroslav Benes Link: https://lore.kernel.org/r/20200914153409.25097-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 2 +- arch/arm64/kernel/perf_callchain.c | 6 +++--- arch/arm64/kernel/return_address.c | 8 ++++---- arch/arm64/kernel/stacktrace.c | 11 +++++------ 4 files changed, 13 insertions(+), 14 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index fc7613023c19..eb29b1fe8255 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -63,7 +63,7 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data); + bool (*fn)(void *, unsigned long), void *data); extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index b0e03e052dd1..88ff471b0bce 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -137,11 +137,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, * whist unwinding the stackframe and is like a subroutine return so we use * the PC. */ -static int callchain_trace(struct stackframe *frame, void *data) +static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, frame->pc); - return 0; + perf_callchain_store(entry, pc); + return true; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index a5e8b3b9d798..a6d18755652f 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -18,16 +18,16 @@ struct return_address_data { void *addr; }; -static int save_return_addr(struct stackframe *frame, void *d) +static bool save_return_addr(void *d, unsigned long pc) { struct return_address_data *data = d; if (!data->level) { - data->addr = (void *)frame->pc; - return 1; + data->addr = (void *)pc; + return false; } else { --data->level; - return 0; + return true; } } NOKPROBE_SYMBOL(save_return_addr); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 2dd8e3b8b94b..05eaba21fd46 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -118,12 +118,12 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) NOKPROBE_SYMBOL(unwind_frame); void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data) + bool (*fn)(void *, unsigned long), void *data) { while (1) { int ret; - if (fn(frame, data)) + if (!fn(data, frame->pc)) break; ret = unwind_frame(tsk, frame); if (ret < 0) @@ -139,17 +139,16 @@ struct stack_trace_data { unsigned int skip; }; -static int save_trace(struct stackframe *frame, void *d) +static bool save_trace(void *d, unsigned long addr) { struct stack_trace_data *data = d; struct stack_trace *trace = data->trace; - unsigned long addr = frame->pc; if (data->no_sched_functions && in_sched_functions(addr)) - return 0; + return false; if (data->skip) { data->skip--; - return 0; + return false; } trace->entries[trace->nr_entries++] = addr; -- cgit v1.2.3 From 0fdb64c2a303e4d2562245501920241c0e507951 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Sep 2020 15:48:09 +0100 Subject: arm64: Improve diagnostics when trapping BRK with FAULT_BRK_IMM When generating instructions at runtime, for example due to kernel text patching or the BPF JIT, we can emit a trapping BRK instruction if we are asked to encode an invalid instruction such as an out-of-range] branch. This is indicative of a bug in the caller, and will result in a crash on executing the generated code. Unfortunately, the message from the crash is really unhelpful, and mumbles something about ptrace: | Unexpected kernel BRK exception at EL1 | Internal error: ptrace BRK handler: f2000100 [#1] SMP We can do better than this. Install a break handler for FAULT_BRK_IMM, which is the immediate used to encode the "I've been asked to generate an invalid instruction" error, and triage the faulting PC to determine whether or not the failure occurred in the BPF JIT. Link: https://lore.kernel.org/r/20200915141707.GB26439@willie-the-truck Reported-by: Ilias Apalodimas Signed-off-by: Will Deacon --- arch/arm64/include/asm/extable.h | 9 +++++++++ arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/traps.c | 17 +++++++++++++++++ arch/arm64/mm/extable.c | 4 +--- 4 files changed, 28 insertions(+), 4 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 840a35ed92ec..b15eb4a3e6b2 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -22,6 +22,15 @@ struct exception_table_entry #define ARCH_HAS_RELATIVE_EXTABLE +static inline bool in_bpf_jit(struct pt_regs *regs) +{ + if (!IS_ENABLED(CONFIG_BPF_JIT)) + return false; + + return regs->pc >= BPF_JIT_REGION_START && + regs->pc < BPF_JIT_REGION_END; +} + #ifdef CONFIG_BPF_JIT int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 7310a4f7f993..fa76151de6ff 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -384,7 +384,7 @@ void __init debug_traps_init(void) hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); + TRAP_BRKPT, "BRK handler"); } /* Re-enable single step for syscall restarting. */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..00e92c9f338c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -994,6 +995,21 @@ static struct break_hook bug_break_hook = { .imm = BUG_BRK_IMM, }; +static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) +{ + pr_err("%s generated an invalid instruction at %pS!\n", + in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching", + (void *)instruction_pointer(regs)); + + /* We cannot handle this */ + return DBG_HOOK_ERROR; +} + +static struct break_hook fault_break_hook = { + .fn = reserved_fault_handler, + .imm = FAULT_BRK_IMM, +}; + #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_ESR_RECOVER 0x20 @@ -1059,6 +1075,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr, void __init trap_init(void) { register_kernel_break_hook(&bug_break_hook); + register_kernel_break_hook(&fault_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); #endif diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index eee1732ab6cd..aa0060178343 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -14,9 +14,7 @@ int fixup_exception(struct pt_regs *regs) if (!fixup) return 0; - if (IS_ENABLED(CONFIG_BPF_JIT) && - regs->pc >= BPF_JIT_REGION_START && - regs->pc < BPF_JIT_REGION_END) + if (in_bpf_jit(regs)) return arm64_bpf_fixup_exception(fixup, regs); regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; -- cgit v1.2.3 From 315cf047d230a363515bedce177cfbf460cbb2d6 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:51 +0100 Subject: arm64/fpsimdmacros: Introduce a macro to update ZCR_EL1.LEN A follow-up patch will need to update ZCR_EL1.LEN. Add a macro that could be re-used in the current and new places to avoid code duplication. Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimdmacros.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 636e9d9c7929..60e29a3e41c5 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -183,6 +183,17 @@ .purgem _for__body .endm +/* Update ZCR_EL1.LEN with the new VQ */ +.macro sve_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_ZCR_EL1 + bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising +921: +.endm + .macro sve_save nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 @@ -197,13 +208,7 @@ .endm .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - mrs_s x\nxtmp, SYS_ZCR_EL1 - bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK - orr \xtmp2, \xtmp2, \xvqminus1 - cmp \xtmp2, x\nxtmp - b.eq 921f - msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising -921: + sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 -- cgit v1.2.3 From 6d40f05fad0babfb3e991edb2d0bd28a30a2f10c Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:50 +0100 Subject: arm64/fpsimdmacros: Allow the macro "for" to be used in more cases The current version of the macro "for" is not able to work when the counter is used to generate registers using mnemonics. This is because gas is not able to evaluate the expression generated if used in register's name (i.e x\n). Gas offers a way to evaluate macro arguments by using % in front of them under the alternate macro mode. The implementation of "for" is updated to use the alternate macro mode and %, so we can use the macro in more cases. As the alternate macro mode may have side-effects, this is disabled when expanding the body. While it is enough to prefix the argument of the macro "__for_body" with %, the arguments of "__for" are also prefixed to get a more bearable value in case of compilation error. Suggested-by: Dave Martin Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimdmacros.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 60e29a3e41c5..feef5b371fba 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -166,19 +166,23 @@ .macro __for from:req, to:req .if (\from) == (\to) - _for__body \from + _for__body %\from .else - __for \from, (\from) + ((\to) - (\from)) / 2 - __for (\from) + ((\to) - (\from)) / 2 + 1, \to + __for %\from, %((\from) + ((\to) - (\from)) / 2) + __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to .endif .endm .macro _for var:req, from:req, to:req, insn:vararg .macro _for__body \var:req + .noaltmacro \insn + .altmacro .endm + .altmacro __for \from, \to + .noaltmacro .purgem _for__body .endm -- cgit v1.2.3 From 1e530f1352a2709d7bcacdb8b6d42c8900ba2c80 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:52 +0100 Subject: arm64/sve: Implement a helper to flush SVE registers Introduce a new helper that will zero all SVE registers but the first 128-bits of each vector. This will be used by subsequent patches to avoid costly store/maipulate/reload sequences in places like do_sve_acc(). Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-6-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++ arch/arm64/kernel/entry-fpsimd.S | 8 ++++++++ 3 files changed, 28 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 59f10dd13f12..958f642e930d 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -69,6 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread) extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); +extern void sve_flush_live(void); extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index feef5b371fba..af43367534c7 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -164,6 +164,13 @@ | ((\np) << 5) .endm +/* PFALSE P\np.B */ +.macro _sve_pfalse np + _sve_check_preg \np + .inst 0x2518e400 \ + | (\np) +.endm + .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from @@ -198,6 +205,18 @@ 921: .endm +/* Preserve the first 128-bits of Znz and zero the rest. */ +.macro _sve_flush_z nz + _sve_check_zreg \nz + mov v\nz\().16b, v\nz\().16b +.endm + +.macro sve_flush + _for n, 0, 31, _sve_flush_z \n + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 +.endm + .macro sve_save nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index f880dd63ddc3..fdf7a5a35c63 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -32,6 +32,7 @@ SYM_FUNC_START(fpsimd_load_state) SYM_FUNC_END(fpsimd_load_state) #ifdef CONFIG_ARM64_SVE + SYM_FUNC_START(sve_save_state) sve_save 0, x1, 2 ret @@ -46,4 +47,11 @@ SYM_FUNC_START(sve_get_vl) _sve_rdvl 0, 1 ret SYM_FUNC_END(sve_get_vl) + +/* Zero all SVE registers but the first 128-bits of each vector */ +SYM_FUNC_START(sve_flush_live) + sve_flush + ret +SYM_FUNC_END(sve_flush_live) + #endif /* CONFIG_ARM64_SVE */ -- cgit v1.2.3 From 9c4b4c701e53183df94f6b9802b63762f0c0e1e3 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:53 +0100 Subject: arm64/sve: Implement a helper to load SVE registers from FPSIMD state In a follow-up patch, we may save the FPSIMD rather than the full SVE state when the state has to be zeroed on return to userspace (e.g during a syscall). Introduce an helper to load SVE vectors from FPSIMD state and zero the rest of SVE registers. Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-7-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/entry-fpsimd.S | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 958f642e930d..bec5f14b622a 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -70,6 +70,8 @@ extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); extern void sve_flush_live(void); +extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, + unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index fdf7a5a35c63..2ca395c25448 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -48,6 +48,23 @@ SYM_FUNC_START(sve_get_vl) ret SYM_FUNC_END(sve_get_vl) +/* + * Load SVE state from FPSIMD state. + * + * x0 = pointer to struct fpsimd_state + * x1 = VQ - 1 + * + * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD + * and the rest zeroed. All the other SVE registers will be zeroed. + */ +SYM_FUNC_START(sve_load_from_fpsimd_state) + sve_load_vq x1, x2, x3 + fpsimd_restore x0, 8 + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 + ret +SYM_FUNC_END(sve_load_from_fpsimd_state) + /* Zero all SVE registers but the first 128-bits of each vector */ SYM_FUNC_START(sve_flush_live) sve_flush -- cgit v1.2.3 From a194c5f2d2b3a05428805146afcabe5140b5d378 Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Mon, 21 Sep 2020 10:39:36 +0800 Subject: arm64/mm: return cpu_all_mask when node is NUMA_NO_NODE The @node passed to cpumask_of_node() can be NUMA_NO_NODE, in that case it will trigger the following WARN_ON(node >= nr_node_ids) due to mismatched data types of @node and @nr_node_ids. Actually we should return cpu_all_mask just like most other architectures do if passed NUMA_NO_NODE. Also add a similar check to the inline cpumask_of_node() in numa.h. Signed-off-by: Zhengyuan Liu Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20200921023936.21846-1-liuzhengyuan@tj.kylinos.cn Signed-off-by: Will Deacon --- arch/arm64/include/asm/numa.h | 3 +++ arch/arm64/mm/numa.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf..dd870390d639 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 73f8b49d485c..88e51aade0da 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -46,7 +46,11 @@ EXPORT_SYMBOL(node_to_cpumask_map); */ const struct cpumask *cpumask_of_node(int node) { - if (WARN_ON(node >= nr_node_ids)) + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) return cpu_none_mask; if (WARN_ON(node_to_cpumask_map[node] == NULL)) -- cgit v1.2.3 From f5be3a61fdb5dd11ef60173e2783ccf62685f892 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 22 Sep 2020 13:53:45 +0800 Subject: arm64: perf: Add support caps under sysfs ARMv8.4-PMU introduces the PMMIR_EL1 registers and some new PMU events, like STALL_SLOT etc, are related to it. Let's add a caps directory to /sys/bus/event_source/devices/armv8_pmuv3_0/ and support slots from PMMIR_EL1 registers in this entry. The user programs can get the slots from sysfs directly. /sys/bus/event_source/devices/armv8_pmuv3_0/caps/slots is exposed under sysfs. Both ARMv8.4-PMU and STALL_SLOT event are implemented, it returns the slots from PMMIR_EL1, otherwise it will return 0. Signed-off-by: Shaokun Zhang Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1600754025-53535-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 3 ++ arch/arm64/include/asm/sysreg.h | 2 + arch/arm64/kernel/perf_event.c | 103 ++++++++++++++++++++++++------------ include/linux/perf/arm_pmu.h | 3 ++ 4 files changed, 78 insertions(+), 33 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2c2d7dbe8a02..60731f602d3e 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -236,6 +236,9 @@ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* PMMIR_EL1.SLOTS mask */ +#define ARMV8_PMU_SLOTS_MASK 0xff + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 554a7e8ecb07..921773adff5e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -321,6 +321,8 @@ #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) +#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6) + #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 04d0ceb72a67..34a7cd3af699 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -305,6 +305,28 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { .attrs = armv8_pmuv3_format_attrs, }; +static ssize_t slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; + + return snprintf(page, PAGE_SIZE, "0x%08x\n", slots); +} + +static DEVICE_ATTR_RO(slots); + +static struct attribute *armv8_pmuv3_caps_attrs[] = { + &dev_attr_slots.attr, + NULL, +}; + +static struct attribute_group armv8_pmuv3_caps_attr_group = { + .name = "caps", + .attrs = armv8_pmuv3_caps_attrs, +}; + /* * Perf Events' indices */ @@ -984,6 +1006,12 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + /* store PMMIR_EL1 register for sysfs */ + if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31))) + cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + else + cpu_pmu->reg_pmmir = 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1006,7 +1034,8 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, int (*map_event)(struct perf_event *event), const struct attribute_group *events, - const struct attribute_group *format) + const struct attribute_group *format, + const struct attribute_group *caps) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -1031,104 +1060,112 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, events : &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? format : &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? + caps : &armv8_pmuv3_caps_attr_group; return 0; } +static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event)) +{ + return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); +} + static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_pmuv3", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_pmuv3", + armv8_pmuv3_map_event); } static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a34", + armv8_pmuv3_map_event); } static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event); } static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a55", + armv8_pmuv3_map_event); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event); } static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a65", + armv8_pmuv3_map_event); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", - armv8_a73_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event); } static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a75", + armv8_pmuv3_map_event); } static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a76", + armv8_pmuv3_map_event); } static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a77", + armv8_pmuv3_map_event); } static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1", + armv8_pmuv3_map_event); } static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_n1", + armv8_pmuv3_map_event); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", - armv8_thunder_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event); } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", - armv8_vulcan_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event); } static const struct of_device_id armv8_pmu_of_device_ids[] = { diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 5b616dde9a4c..505480217cf1 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -73,6 +73,7 @@ enum armpmu_attr_groups { ARMPMU_ATTR_GROUP_COMMON, ARMPMU_ATTR_GROUP_EVENTS, ARMPMU_ATTR_GROUP_FORMATS, + ARMPMU_ATTR_GROUP_CAPS, ARMPMU_NR_ATTR_GROUPS }; @@ -109,6 +110,8 @@ struct arm_pmu { struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; + /* store the PMMIR_EL1 to expose slots */ + u64 reg_pmmir; /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; -- cgit v1.2.3 From 48118151d8cc7a457f61d91df0c053473d4b31b1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Sep 2020 12:18:44 +0200 Subject: arm64: mm: Pin down ASIDs for sharing mm with devices To enable address space sharing with the IOMMU, introduce arm64_mm_context_get() and arm64_mm_context_put(), that pin down a context and ensure that it will keep its ASID after a rollover. Export the symbols to let the modular SMMUv3 driver use them. Pinning is necessary because a device constantly needs a valid ASID, unlike tasks that only require one when running. Without pinning, we would need to notify the IOMMU when we're about to use a new ASID for a task, and it would get complicated when a new task is assigned a shared ASID. Consider the following scenario with no ASID pinned: 1. Task t1 is running on CPUx with shared ASID (gen=1, asid=1) 2. Task t2 is scheduled on CPUx, gets ASID (1, 2) 3. Task tn is scheduled on CPUy, a rollover occurs, tn gets ASID (2, 1) We would now have to immediately generate a new ASID for t1, notify the IOMMU, and finally enable task tn. We are holding the lock during all that time, since we can't afford having another CPU trigger a rollover. The IOMMU issues invalidation commands that can take tens of milliseconds. It gets needlessly complicated. All we wanted to do was schedule task tn, that has no business with the IOMMU. By letting the IOMMU pin tasks when needed, we avoid stalling the slow path, and let the pinning fail when we're out of shareable ASIDs. After a rollover, the allocator expects at least one ASID to be available in addition to the reserved ones (one per CPU). So (NR_ASIDS - NR_CPUS - 1) is the maximum number of ASIDs that can be shared with the IOMMU. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20200918101852.582559-5-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 3 + arch/arm64/include/asm/mmu_context.h | 11 +++- arch/arm64/mm/context.c | 105 +++++++++++++++++++++++++++++++++-- 3 files changed, 112 insertions(+), 7 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a7a5ecaa2e83..0fda85b2cc1b 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,11 +17,14 @@ #ifndef __ASSEMBLY__ +#include + typedef struct { atomic64_t id; #ifdef CONFIG_COMPAT void *sigpage; #endif + refcount_t pinned; void *vdso; unsigned long flags; } mm_context_t; diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f2d7537d6f83..0672236e1aea 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -177,7 +177,13 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) #define destroy_context(mm) do { } while(0) void check_and_switch_context(struct mm_struct *mm); -#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + atomic64_set(&mm->context.id, 0); + refcount_set(&mm->context.pinned, 0); + return 0; +} #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -248,6 +254,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); +unsigned long arm64_mm_context_get(struct mm_struct *mm); +void arm64_mm_context_put(struct mm_struct *mm); + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 9b11c096a042..001737a8f309 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -27,6 +27,10 @@ static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +static unsigned long max_pinned_asids; +static unsigned long nr_pinned_asids; +static unsigned long *pinned_asid_map; + #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) @@ -72,7 +76,7 @@ void verify_cpu_asid_bits(void) } } -static void set_kpti_asid_bits(void) +static void set_kpti_asid_bits(unsigned long *map) { unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); /* @@ -81,13 +85,15 @@ static void set_kpti_asid_bits(void) * is set, then the ASID will map only userspace. Thus * mark even as reserved for kernel. */ - memset(asid_map, 0xaa, len); + memset(map, 0xaa, len); } static void set_reserved_asid_bits(void) { - if (arm64_kernel_unmapped_at_el0()) - set_kpti_asid_bits(); + if (pinned_asid_map) + bitmap_copy(asid_map, pinned_asid_map, NUM_USER_ASIDS); + else if (arm64_kernel_unmapped_at_el0()) + set_kpti_asid_bits(asid_map); else bitmap_clear(asid_map, 0, NUM_USER_ASIDS); } @@ -165,6 +171,14 @@ static u64 new_context(struct mm_struct *mm) if (check_update_reserved_asid(asid, newasid)) return newasid; + /* + * If it is pinned, we can keep using it. Note that reserved + * takes priority, because even if it is also pinned, we need to + * update the generation into the reserved_asids. + */ + if (refcount_read(&mm->context.pinned)) + return newasid; + /* * We had a valid ASID in a previous life, so try to re-use * it if possible. @@ -256,6 +270,71 @@ switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } +unsigned long arm64_mm_context_get(struct mm_struct *mm) +{ + unsigned long flags; + u64 asid; + + if (!pinned_asid_map) + return 0; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + + asid = atomic64_read(&mm->context.id); + + if (refcount_inc_not_zero(&mm->context.pinned)) + goto out_unlock; + + if (nr_pinned_asids >= max_pinned_asids) { + asid = 0; + goto out_unlock; + } + + if (!asid_gen_match(asid)) { + /* + * We went through one or more rollover since that ASID was + * used. Ensure that it is still valid, or generate a new one. + */ + asid = new_context(mm); + atomic64_set(&mm->context.id, asid); + } + + nr_pinned_asids++; + __set_bit(asid2idx(asid), pinned_asid_map); + refcount_set(&mm->context.pinned, 1); + +out_unlock: + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + + asid &= ~ASID_MASK; + + /* Set the equivalent of USER_ASID_BIT */ + if (asid && arm64_kernel_unmapped_at_el0()) + asid |= 1; + + return asid; +} +EXPORT_SYMBOL_GPL(arm64_mm_context_get); + +void arm64_mm_context_put(struct mm_struct *mm) +{ + unsigned long flags; + u64 asid = atomic64_read(&mm->context.id); + + if (!pinned_asid_map) + return; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + + if (refcount_dec_and_test(&mm->context.pinned)) { + __clear_bit(asid2idx(asid), pinned_asid_map); + nr_pinned_asids--; + } + + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +EXPORT_SYMBOL_GPL(arm64_mm_context_put); + /* Errata workaround post TTBRx_EL1 update. */ asmlinkage void post_ttbr_update_workaround(void) { @@ -296,8 +375,11 @@ static int asids_update_limit(void) { unsigned long num_available_asids = NUM_USER_ASIDS; - if (arm64_kernel_unmapped_at_el0()) + if (arm64_kernel_unmapped_at_el0()) { num_available_asids /= 2; + if (pinned_asid_map) + set_kpti_asid_bits(pinned_asid_map); + } /* * Expect allocation after rollover to fail if we don't have at least * one more ASID than CPUs. ASID #0 is reserved for init_mm. @@ -305,6 +387,13 @@ static int asids_update_limit(void) WARN_ON(num_available_asids - 1 <= num_possible_cpus()); pr_info("ASID allocator initialised with %lu entries\n", num_available_asids); + + /* + * There must always be an ASID available after rollover. Ensure that, + * even if all CPUs have a reserved ASID and the maximum number of ASIDs + * are pinned, there still is at least one empty slot in the ASID map. + */ + max_pinned_asids = num_available_asids - num_possible_cpus() - 2; return 0; } arch_initcall(asids_update_limit); @@ -319,13 +408,17 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); + pinned_asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), + sizeof(*pinned_asid_map), GFP_KERNEL); + nr_pinned_asids = 0; + /* * We cannot call set_reserved_asid_bits() here because CPU * caps are not finalized yet, so it is safer to assume KPTI * and reserve kernel ASID's from beginning. */ if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) - set_kpti_asid_bits(); + set_kpti_asid_bits(asid_map); return 0; } early_initcall(asids_init); -- cgit v1.2.3 From 6a1bdb173f9967b2329aab0f25bcba963f54e06b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 30 Sep 2020 13:20:40 +0100 Subject: arm64: mm: Make flush_tlb_fix_spurious_fault() a no-op Our use of broadcast TLB maintenance means that spurious page-faults that have been handled already by another CPU do not require additional TLB maintenance. Make flush_tlb_fix_spurious_fault() a no-op and rely on the existing TLB invalidation instead. Add an explicit flush_tlb_page() when making a page dirty, as the TLB is permitted to cache the old read-only entry. Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200728092220.GA21800@willie-the-truck Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 8 ++++++++ arch/arm64/mm/fault.c | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index bc68da9f5706..02ad3105c14c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -50,6 +50,14 @@ extern void __pgd_error(const char *file, int line, unsigned long val); __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +/* + * Outside of a few very special situations (e.g. hibernation), we always + * use broadcast TLB invalidation instructions, therefore a spurious page + * fault on one CPU which has been handled concurrently by another CPU + * does not need to perform additional invalidation. + */ +#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f07333e86c2f..a696a7921da4 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -218,7 +218,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma, pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); - flush_tlb_fix_spurious_fault(vma, address); + /* Invalidate a stale read-only entry */ + if (dirty) + flush_tlb_page(vma, address); return 1; } -- cgit v1.2.3