From db5157df149709c02e6a08c0b3498553bdd2a76c Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 11 Dec 2024 22:57:41 -0800 Subject: x86/cpu: Remove get_this_hybrid_cpu_*() Because calls to get_this_hybrid_cpu_type() and get_this_hybrid_cpu_native_id() are not required now. cpu-type and native-model-id are cached at boot in per-cpu struct cpuinfo_topology. Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20241211-add-cpu-type-v5-4-2ae010f50370@linux.intel.com --- arch/x86/kernel/cpu/intel.c | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3dce22f00dc3..045b439c653a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -873,34 +873,3 @@ static const struct cpu_dev intel_cpu_dev = { }; cpu_dev_register(intel_cpu_dev); - -#define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 - -/** - * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU - * - * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in - * a hybrid processor. If the processor is not hybrid, returns 0. - */ -u8 get_this_hybrid_cpu_type(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - return 0; - - return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; -} - -/** - * get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU - * - * Returns the uarch native ID [23:0] of a CPU in a hybrid processor. - * If the processor is not hybrid, returns 0. - */ -u32 get_this_hybrid_cpu_native_id(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - return 0; - - return cpuid_eax(0x0000001a) & - (BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1); -} -- cgit v1.2.3 From dec7fdc0b79c2ae0a537343b17f5ba1c6c47e1ca Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:16 +0100 Subject: x86/cpu: Remove unnecessary headers and reorder the rest Remove the headers at intel.c that are no longer required. Alphabetically reorder what remains since more headers will be included in further commits. Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250304085152.51092-6-darwi@linutronix.de --- arch/x86/kernel/cpu/intel.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c5d833f5bffb..60b58b1a0c69 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1,40 +1,30 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include #include -#include -#include -#include -#include #include -#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#include +#endif -#include -#include #include +#include +#include #include +#include #include #include -#include -#include -#include -#include +#include #include +#include #include - -#ifdef CONFIG_X86_64 -#include -#endif +#include #include "cpu.h" -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#endif - /* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists -- cgit v1.2.3 From cb5f4c76b2a9314c35e00c67c98ccd03542c2634 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:17 +0100 Subject: x86/cpu: Use max() for CPUID leaf 0x2 TLB descriptors parsing The conditional statement "if (x < y) { x = y; }" appears 22 times at the Intel leaf 0x2 descriptors parsing logic. Replace each of such instances with a max() expression to simplify the code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250304085152.51092-7-darwi@linutronix.de --- arch/x86/kernel/cpu/intel.c | 76 +++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 60b58b1a0c69..42a57b85f93b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -700,7 +701,9 @@ static const struct _tlb_table intel_tlb_table[] = { static void intel_tlb_lookup(const unsigned char desc) { + unsigned int entries; unsigned char k; + if (desc == 0) return; @@ -712,81 +715,58 @@ static void intel_tlb_lookup(const unsigned char desc) if (intel_tlb_table[k].tlb_type == 0) return; + entries = intel_tlb_table[k].entries; switch (intel_tlb_table[k].tlb_type) { case STLB_4K: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); + tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); break; case STLB_4K_2M: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); + tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); + tlb_lli_2m[ENTRIES] = max(tlb_lli_2m[ENTRIES], entries); + tlb_lld_2m[ENTRIES] = max(tlb_lld_2m[ENTRIES], entries); + tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); + tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); break; case TLB_INST_ALL: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); + tlb_lli_2m[ENTRIES] = max(tlb_lli_2m[ENTRIES], entries); + tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); break; case TLB_INST_4K: - if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); break; case TLB_INST_4M: - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); break; case TLB_INST_2M_4M: - if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lli_2m[ENTRIES] = max(tlb_lli_2m[ENTRIES], entries); + tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); break; case TLB_DATA_4K: case TLB_DATA0_4K: - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); break; case TLB_DATA_4M: case TLB_DATA0_4M: - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); break; case TLB_DATA_2M_4M: case TLB_DATA0_2M_4M: - if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_2m[ENTRIES] = max(tlb_lld_2m[ENTRIES], entries); + tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); break; case TLB_DATA_4K_4M: - if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; - if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); + tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); break; case TLB_DATA_1G_2M_4M: - if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) - tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; - if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) - tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + tlb_lld_2m[ENTRIES] = max(tlb_lld_2m[ENTRIES], TLB_0x63_2M_4M_ENTRIES); + tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], TLB_0x63_2M_4M_ENTRIES); fallthrough; case TLB_DATA_1G: - if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) - tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; + tlb_lld_1g[ENTRIES] = max(tlb_lld_1g[ENTRIES], entries); break; } } -- cgit v1.2.3 From 8b7e54b542103753619a37cbb3216849a934872f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:18 +0100 Subject: x86/cpu: Simplify TLB entry count storage Commit: e0ba94f14f74 ("x86/tlb_info: get last level TLB entry number of CPU") introduced u16 "info" arrays for each TLB type. Since 2012 and each array stores just one type of information: the number of TLB entries for its respective TLB type. Replace such arrays with simple variables. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250304085152.51092-8-darwi@linutronix.de --- arch/x86/include/asm/processor.h | 19 ++++++---------- arch/x86/kernel/cpu/amd.c | 18 +++++++-------- arch/x86/kernel/cpu/common.c | 20 ++++++++--------- arch/x86/kernel/cpu/hygon.c | 16 +++++++------- arch/x86/kernel/cpu/intel.c | 48 ++++++++++++++++++++-------------------- 5 files changed, 57 insertions(+), 64 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c0cd10182e90..0ea227fa027c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -60,18 +60,13 @@ struct vm86; # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -enum tlb_infos { - ENTRIES, - NR_INFO -}; - -extern u16 __read_mostly tlb_lli_4k[NR_INFO]; -extern u16 __read_mostly tlb_lli_2m[NR_INFO]; -extern u16 __read_mostly tlb_lli_4m[NR_INFO]; -extern u16 __read_mostly tlb_lld_4k[NR_INFO]; -extern u16 __read_mostly tlb_lld_2m[NR_INFO]; -extern u16 __read_mostly tlb_lld_4m[NR_INFO]; -extern u16 __read_mostly tlb_lld_1g[NR_INFO]; +extern u16 __read_mostly tlb_lli_4k; +extern u16 __read_mostly tlb_lli_2m; +extern u16 __read_mostly tlb_lli_4m; +extern u16 __read_mostly tlb_lld_4k; +extern u16 __read_mostly tlb_lld_2m; +extern u16 __read_mostly tlb_lld_4m; +extern u16 __read_mostly tlb_lld_1g; /* * CPU type and hardware bug flags. Kept separately for each CPU. diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d747515ad013..315766440201 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1105,8 +1105,8 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); - tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; - tlb_lli_4k[ENTRIES] = ebx & mask; + tlb_lld_4k = (ebx >> 16) & mask; + tlb_lli_4k = ebx & mask; /* * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB @@ -1119,26 +1119,26 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!((eax >> 16) & mask)) - tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; + tlb_lld_2m = (cpuid_eax(0x80000005) >> 16) & 0xff; else - tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + tlb_lld_2m = (eax >> 16) & mask; /* a 4M entry uses two 2M entries */ - tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + tlb_lld_4m = tlb_lld_2m >> 1; /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!(eax & mask)) { /* Erratum 658 */ if (c->x86 == 0x15 && c->x86_model <= 0x1f) { - tlb_lli_2m[ENTRIES] = 1024; + tlb_lli_2m = 1024; } else { cpuid(0x80000005, &eax, &ebx, &ecx, &edx); - tlb_lli_2m[ENTRIES] = eax & 0xff; + tlb_lli_2m = eax & 0xff; } } else - tlb_lli_2m[ENTRIES] = eax & mask; + tlb_lli_2m = eax & mask; - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + tlb_lli_4m = tlb_lli_2m >> 1; } static const struct cpu_dev amd_cpu_dev = { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8eba9ca9c216..3a1a957e0c60 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -846,13 +846,13 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; } -u16 __read_mostly tlb_lli_4k[NR_INFO]; -u16 __read_mostly tlb_lli_2m[NR_INFO]; -u16 __read_mostly tlb_lli_4m[NR_INFO]; -u16 __read_mostly tlb_lld_4k[NR_INFO]; -u16 __read_mostly tlb_lld_2m[NR_INFO]; -u16 __read_mostly tlb_lld_4m[NR_INFO]; -u16 __read_mostly tlb_lld_1g[NR_INFO]; +u16 __read_mostly tlb_lli_4k; +u16 __read_mostly tlb_lli_2m; +u16 __read_mostly tlb_lli_4m; +u16 __read_mostly tlb_lld_4k; +u16 __read_mostly tlb_lld_2m; +u16 __read_mostly tlb_lld_4m; +u16 __read_mostly tlb_lld_1g; static void cpu_detect_tlb(struct cpuinfo_x86 *c) { @@ -860,12 +860,10 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) this_cpu->c_detect_tlb(c); pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", - tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], - tlb_lli_4m[ENTRIES]); + tlb_lli_4k, tlb_lli_2m, tlb_lli_4m); pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", - tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], - tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); + tlb_lld_4k, tlb_lld_2m, tlb_lld_4m, tlb_lld_1g); } void get_cpu_vendor(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index c5191b06f9f2..6af4a4a90a52 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -240,26 +240,26 @@ static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); - tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; - tlb_lli_4k[ENTRIES] = ebx & mask; + tlb_lld_4k = (ebx >> 16) & mask; + tlb_lli_4k = ebx & mask; /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!((eax >> 16) & mask)) - tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; + tlb_lld_2m = (cpuid_eax(0x80000005) >> 16) & 0xff; else - tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + tlb_lld_2m = (eax >> 16) & mask; /* a 4M entry uses two 2M entries */ - tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + tlb_lld_4m = tlb_lld_2m >> 1; /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ if (!(eax & mask)) { cpuid(0x80000005, &eax, &ebx, &ecx, &edx); - tlb_lli_2m[ENTRIES] = eax & 0xff; + tlb_lli_2m = eax & 0xff; } else - tlb_lli_2m[ENTRIES] = eax & mask; + tlb_lli_2m = eax & mask; - tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + tlb_lli_4m = tlb_lli_2m >> 1; } static const struct cpu_dev hygon_cpu_dev = { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 42a57b85f93b..61d3fd31baee 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -718,55 +718,55 @@ static void intel_tlb_lookup(const unsigned char desc) entries = intel_tlb_table[k].entries; switch (intel_tlb_table[k].tlb_type) { case STLB_4K: - tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); - tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); + tlb_lli_4k = max(tlb_lli_4k, entries); + tlb_lld_4k = max(tlb_lld_4k, entries); break; case STLB_4K_2M: - tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); - tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); - tlb_lli_2m[ENTRIES] = max(tlb_lli_2m[ENTRIES], entries); - tlb_lld_2m[ENTRIES] = max(tlb_lld_2m[ENTRIES], entries); - tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); - tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); + tlb_lli_4k = max(tlb_lli_4k, entries); + tlb_lld_4k = max(tlb_lld_4k, entries); + tlb_lli_2m = max(tlb_lli_2m, entries); + tlb_lld_2m = max(tlb_lld_2m, entries); + tlb_lli_4m = max(tlb_lli_4m, entries); + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_INST_ALL: - tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); - tlb_lli_2m[ENTRIES] = max(tlb_lli_2m[ENTRIES], entries); - tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); + tlb_lli_4k = max(tlb_lli_4k, entries); + tlb_lli_2m = max(tlb_lli_2m, entries); + tlb_lli_4m = max(tlb_lli_4m, entries); break; case TLB_INST_4K: - tlb_lli_4k[ENTRIES] = max(tlb_lli_4k[ENTRIES], entries); + tlb_lli_4k = max(tlb_lli_4k, entries); break; case TLB_INST_4M: - tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); + tlb_lli_4m = max(tlb_lli_4m, entries); break; case TLB_INST_2M_4M: - tlb_lli_2m[ENTRIES] = max(tlb_lli_2m[ENTRIES], entries); - tlb_lli_4m[ENTRIES] = max(tlb_lli_4m[ENTRIES], entries); + tlb_lli_2m = max(tlb_lli_2m, entries); + tlb_lli_4m = max(tlb_lli_4m, entries); break; case TLB_DATA_4K: case TLB_DATA0_4K: - tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); + tlb_lld_4k = max(tlb_lld_4k, entries); break; case TLB_DATA_4M: case TLB_DATA0_4M: - tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_DATA_2M_4M: case TLB_DATA0_2M_4M: - tlb_lld_2m[ENTRIES] = max(tlb_lld_2m[ENTRIES], entries); - tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); + tlb_lld_2m = max(tlb_lld_2m, entries); + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_DATA_4K_4M: - tlb_lld_4k[ENTRIES] = max(tlb_lld_4k[ENTRIES], entries); - tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], entries); + tlb_lld_4k = max(tlb_lld_4k, entries); + tlb_lld_4m = max(tlb_lld_4m, entries); break; case TLB_DATA_1G_2M_4M: - tlb_lld_2m[ENTRIES] = max(tlb_lld_2m[ENTRIES], TLB_0x63_2M_4M_ENTRIES); - tlb_lld_4m[ENTRIES] = max(tlb_lld_4m[ENTRIES], TLB_0x63_2M_4M_ENTRIES); + tlb_lld_2m = max(tlb_lld_2m, TLB_0x63_2M_4M_ENTRIES); + tlb_lld_4m = max(tlb_lld_4m, TLB_0x63_2M_4M_ENTRIES); fallthrough; case TLB_DATA_1G: - tlb_lld_1g[ENTRIES] = max(tlb_lld_1g[ENTRIES], entries); + tlb_lld_1g = max(tlb_lld_1g, entries); break; } } -- cgit v1.2.3 From 1f61dfdf16cd3bab383741c2eb43e7f69e9f592f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2025 09:51:20 +0100 Subject: x86/cpu: Remove unused TLB strings Commit: e0ba94f14f74 ("x86/tlb_info: get last level TLB entry number of CPU") added the TLB table for parsing CPUID(0x4), including strings describing them. The string entry in the table was never used. Convert them to comments. Signed-off-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250304085152.51092-10-darwi@linutronix.de --- arch/x86/kernel/cpu/cpu.h | 8 ----- arch/x86/kernel/cpu/intel.c | 80 ++++++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 45 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 1beccefbaff9..51deb60a9d26 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -33,14 +33,6 @@ struct cpu_dev { #endif }; -struct _tlb_table { - unsigned char descriptor; - char tlb_type; - unsigned int entries; - /* unsigned int ways; */ - char info[128]; -}; - #define cpu_dev_register(cpu_devX) \ static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ __section(".x86_cpu_dev.init") = \ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 61d3fd31baee..291c82816797 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -658,44 +658,50 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) */ #define TLB_0x63_2M_4M_ENTRIES 32 +struct _tlb_table { + unsigned char descriptor; + char tlb_type; + unsigned int entries; +}; + static const struct _tlb_table intel_tlb_table[] = { - { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, - { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, - { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, - { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, - { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, - { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, - { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, - { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, - { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, - { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, - { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, - { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, - { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, - { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, - { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, - { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, - { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, - { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, - { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, - { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative" - " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" }, - { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, - { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, - { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, - { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, - { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, - { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, - { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, - { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, - { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, - { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, - { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, - { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, - { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, - { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, - { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, - { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, + { 0x01, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages, 4-way set associative */ + { 0x02, TLB_INST_4M, 2}, /* TLB_INST 4 MByte pages, full associative */ + { 0x03, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way set associative */ + { 0x04, TLB_DATA_4M, 8}, /* TLB_DATA 4 MByte pages, 4-way set associative */ + { 0x05, TLB_DATA_4M, 32}, /* TLB_DATA 4 MByte pages, 4-way set associative */ + { 0x0b, TLB_INST_4M, 4}, /* TLB_INST 4 MByte pages, 4-way set associative */ + { 0x4f, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages */ + { 0x50, TLB_INST_ALL, 64}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + { 0x51, TLB_INST_ALL, 128}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + { 0x52, TLB_INST_ALL, 256}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + { 0x55, TLB_INST_2M_4M, 7}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + { 0x56, TLB_DATA0_4M, 16}, /* TLB_DATA0 4 MByte pages, 4-way set associative */ + { 0x57, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, 4-way associative */ + { 0x59, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, fully associative */ + { 0x5a, TLB_DATA0_2M_4M, 32}, /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */ + { 0x5b, TLB_DATA_4K_4M, 64}, /* TLB_DATA 4 KByte and 4 MByte pages */ + { 0x5c, TLB_DATA_4K_4M, 128}, /* TLB_DATA 4 KByte and 4 MByte pages */ + { 0x5d, TLB_DATA_4K_4M, 256}, /* TLB_DATA 4 KByte and 4 MByte pages */ + { 0x61, TLB_INST_4K, 48}, /* TLB_INST 4 KByte pages, full associative */ + { 0x63, TLB_DATA_1G_2M_4M, 4}, /* TLB_DATA 1 GByte pages, 4-way set associative + * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */ + { 0x6b, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 8-way associative */ + { 0x6c, TLB_DATA_2M_4M, 128}, /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */ + { 0x6d, TLB_DATA_1G, 16}, /* TLB_DATA 1 GByte pages, fully associative */ + { 0x76, TLB_INST_2M_4M, 8}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + { 0xb0, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 4-way set associative */ + { 0xb1, TLB_INST_2M_4M, 4}, /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */ + { 0xb2, TLB_INST_4K, 64}, /* TLB_INST 4KByte pages, 4-way set associative */ + { 0xb3, TLB_DATA_4K, 128}, /* TLB_DATA 4 KByte pages, 4-way set associative */ + { 0xb4, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 4-way associative */ + { 0xb5, TLB_INST_4K, 64}, /* TLB_INST 4 KByte pages, 8-way set associative */ + { 0xb6, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 8-way set associative */ + { 0xba, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way associative */ + { 0xc0, TLB_DATA_4K_4M, 8}, /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */ + { 0xc1, STLB_4K_2M, 1024}, /* STLB 4 KByte and 2 MByte pages, 8-way associative */ + { 0xc2, TLB_DATA_2M_4M, 16}, /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */ + { 0xca, STLB_4K, 512}, /* STLB 4 KByte pages, 4-way associative */ { 0x00, 0, 0 } }; -- cgit v1.2.3 From 7e67f3617228318da655dc87bc705f9c5f7bb101 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 19 Feb 2025 18:41:20 +0000 Subject: x86/cpu/intel: Fix the MOVSL alignment preference for extended Families The alignment preference for 32-bit MOVSL based bulk memory move has been 8-byte for a long time. However this preference is only set for Family 6 and 15 processors. Use the same preference for upcoming Family numbers 18 and 19. Also, use a simpler VFM based check instead of switching based on Family numbers. Refresh the comment to reflect the new check. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250219184133.816753-3-sohil.mehta@intel.com --- arch/x86/kernel/cpu/intel.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 291c82816797..99b4c4074827 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -440,23 +440,16 @@ static void intel_workarounds(struct cpuinfo_x86 *c) (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); - #ifdef CONFIG_X86_INTEL_USERCOPY /* - * Set up the preferred alignment for movsl bulk memory moves + * MOVSL bulk memory moves can be slow when source and dest are not + * both 8-byte aligned. PII/PIII only like MOVSL with 8-byte alignment. + * + * Set the preferred alignment for Pentium Pro and newer processors, as + * it has only been tested on these. */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ + if (c->x86_vfm >= INTEL_PENTIUM_PRO) movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } #endif intel_smp_check(c); -- cgit v1.2.3 From eaa472f76d1cc356c0f9d4406d2358ca924a797e Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 19 Feb 2025 18:41:23 +0000 Subject: x86/cpu/intel: Replace early Family 6 checks with VFM ones Introduce names for some old pentium models and replace the x86_model checks with VFM ones. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20250219184133.816753-6-sohil.mehta@intel.com --- arch/x86/include/asm/intel-family.h | 4 ++++ arch/x86/kernel/cpu/intel.c | 11 +++++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index f0e7ed012cfa..58735bc3298a 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -45,8 +45,12 @@ /* Wildcard match so X86_MATCH_VFM(ANY) works */ #define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) +/* Family 6 */ #define INTEL_PENTIUM_PRO IFM(6, 0x01) +#define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03) #define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05) +#define INTEL_PENTIUM_III_TUALATIN IFM(6, 0x0B) +#define INTEL_PENTIUM_M_DOTHAN IFM(6, 0x0D) #define INTEL_CORE_YONAH IFM(6, 0x0E) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 99b4c4074827..a49615ffcaca 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -186,7 +186,7 @@ void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; - if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd)) + if (c->x86_vfm < INTEL_PENTIUM_M_DOTHAN) return; /* @@ -341,9 +341,7 @@ static void bsp_init_intel(struct cpuinfo_x86 *c) int ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && + if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO && boot_cpu_data.x86_stepping < 8) { pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; @@ -404,7 +402,8 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) + if ((c->x86_vfm == INTEL_PENTIUM_II_KLAMATH && c->x86_stepping < 3) || + c->x86_vfm < INTEL_PENTIUM_II_KLAMATH) clear_cpu_cap(c, X86_FEATURE_SEP); /* @@ -606,7 +605,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) * to determine which, so we use a boottime override * for the 512kb model, and assume 256 otherwise. */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + if (c->x86_vfm == INTEL_PENTIUM_III_TUALATIN && size == 0) size = 256; /* -- cgit v1.2.3 From fc866f247277894bf887cda01c010e1d98abcb86 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 19 Feb 2025 18:41:24 +0000 Subject: x86/cpu/intel: Replace Family 15 checks with VFM ones Introduce names for some old pentium 4 models and replace the x86_model checks with VFM ones. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20250219184133.816753-7-sohil.mehta@intel.com --- arch/x86/include/asm/intel-family.h | 4 ++++ arch/x86/kernel/cpu/intel.c | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 58735bc3298a..0108695a7f9a 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -184,6 +184,10 @@ /* Family 5 */ #define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ +/* Family 15 - NetBurst */ +#define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */ +#define INTEL_P4_PRESCOTT IFM(15, 0x03) + /* Family 19 */ #define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a49615ffcaca..42cebcac8a77 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -247,8 +247,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) #endif /* CPUID workaround for 0F33/0F34 CPU */ - if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) + if (c->x86_vfm == INTEL_P4_PRESCOTT && + (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* @@ -421,7 +421,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { + if (c->x86_vfm == INTEL_P4_WILLAMETTE && c->x86_stepping == 1) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); -- cgit v1.2.3 From eb1ac3330573e0d6cb0f8dccde34112929fc1344 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 19 Feb 2025 18:41:25 +0000 Subject: x86/cpu/intel: Replace Family 5 model checks with VFM ones Introduce names for some Family 5 models and convert some of the checks to be VFM based. Also, to keep the file sorted by family, move Family 5 to the top of the header file. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20250219184133.816753-8-sohil.mehta@intel.com --- arch/x86/include/asm/intel-family.h | 9 ++++++--- arch/x86/kernel/cpu/intel.c | 11 +++++------ 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 0108695a7f9a..4296c8e132b9 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -45,6 +45,12 @@ /* Wildcard match so X86_MATCH_VFM(ANY) works */ #define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) +/* Family 5 */ +#define INTEL_FAM5_START IFM(5, 0x00) /* Notational marker, also P5 A-step */ +#define INTEL_PENTIUM_75 IFM(5, 0x02) /* P54C */ +#define INTEL_PENTIUM_MMX IFM(5, 0x04) /* P55C */ +#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ + /* Family 6 */ #define INTEL_PENTIUM_PRO IFM(6, 0x01) #define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03) @@ -181,9 +187,6 @@ #define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */ #define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */ -/* Family 5 */ -#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ - /* Family 15 - NetBurst */ #define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */ #define INTEL_P4_PRESCOTT IFM(15, 0x03) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 42cebcac8a77..ae20f450f6e0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -358,9 +358,8 @@ static void intel_smp_check(struct cpuinfo_x86 *c) /* * Mask B, Pentium, but not Pentium MMX */ - if (c->x86 == 5 && - c->x86_stepping >= 1 && c->x86_stepping <= 4 && - c->x86_model <= 3) { + if (c->x86_vfm >= INTEL_FAM5_START && c->x86_vfm < INTEL_PENTIUM_MMX && + c->x86_stepping >= 1 && c->x86_stepping <= 4) { /* * Remember we have B step Pentia with bugs */ @@ -387,7 +386,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * The Quark is also family 5, but does not have the same bug. */ clear_cpu_bug(c, X86_BUG_F00F); - if (c->x86 == 5 && c->x86_model < 9) { + if (c->x86_vfm >= INTEL_FAM5_START && c->x86_vfm < INTEL_QUARK_X1000) { static int f00f_workaround_enabled; set_cpu_bug(c, X86_BUG_F00F); @@ -435,7 +434,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * integrated APIC (see 11AP erratum in "Pentium Processor * Specification Update"). */ - if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && + if (boot_cpu_has(X86_FEATURE_APIC) && c->x86_vfm == INTEL_PENTIUM_75 && (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); @@ -612,7 +611,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) * Intel Quark SoC X1000 contains a 4-way set associative * 16K cache with a 16 byte cache line and 256 lines per tag */ - if ((c->x86 == 5) && (c->x86_model == 9)) + if (c->x86_vfm == INTEL_QUARK_X1000) size = 16; return size; } -- cgit v1.2.3 From 15b7ddcf66fb7ac371279be23b3b6008dad3e36c Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 19 Feb 2025 18:41:29 +0000 Subject: x86/cpu/intel: Fix fast string initialization for extended Families X86_FEATURE_REP_GOOD is a linux defined feature flag to track whether fast string operations should be used for copy_page(). It is also used as a second alternative for clear_page() if enhanced fast string operations (ERMS) are not available. X86_FEATURE_ERMS is an Intel-specific hardware-defined feature flag that tracks hardware support for Enhanced Fast strings. It is used to track whether Fast strings should be used for similar memory copy and memory clearing operations. On top of these, there is a FAST_STRING enable bit in the IA32_MISC_ENABLE MSR. It is typically controlled by the BIOS to provide a hint to the hardware and the OS on whether fast string operations are preferred. Commit: 161ec53c702c ("x86, mem, intel: Initialize Enhanced REP MOVSB/STOSB") introduced a mechanism to honor the BIOS preference for fast string operations and clear the above feature flags if needed. Unfortunately, the current initialization code for Intel to set and clear these bits is confusing at best and likely incorrect. X86_FEATURE_REP_GOOD is cleared in early_init_intel() if MISC_ENABLE.FAST_STRING is 0. But it gets set later on unconditionally for all Family 6 processors in init_intel(). This not only overrides the BIOS preference but also contradicts the earlier check. Fix this by combining the related checks and always relying on the BIOS provided preference for fast string operations. This simplification makes sure the upcoming Intel Family 18 and 19 models are covered as well. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250219184133.816753-12-sohil.mehta@intel.com --- arch/x86/kernel/cpu/intel.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ae20f450f6e0..2181304dc7d3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -289,12 +289,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_PAT); /* - * If fast string is not enabled in IA32_MISC_ENABLE for any reason, - * clear the fast string and enhanced fast string CPU capabilities. + * Modern CPUs are generally expected to have a sane fast string + * implementation. However, BIOSes typically have a knob to tweak + * the architectural MISC_ENABLE.FAST_STRING enable bit. + * + * Adhere to the preference and program the Linux-defined fast + * string flag and enhanced fast string capabilities accordingly. */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { + if (c->x86_vfm >= INTEL_PENTIUM_M_DOTHAN) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { + if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { + /* X86_FEATURE_ERMS is set based on CPUID */ + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } else { pr_info("Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); @@ -545,8 +552,6 @@ static void init_intel(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); #else /* * Names for the Pentium II/Celeron processors -- cgit v1.2.3 From fadb6f569b10bf668677add876ed50586931b8f3 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 19 Feb 2025 18:41:31 +0000 Subject: x86/cpu/intel: Limit the non-architectural constant_tsc model checks X86_FEATURE_CONSTANT_TSC is a Linux-defined, synthesized feature flag. It is used across several vendors. Intel CPUs will set the feature when the architectural CPUID.80000007.EDX[1] bit is set. There are also some Intel CPUs that have the X86_FEATURE_CONSTANT_TSC behavior but don't enumerate it with the architectural bit. Those currently have a model range check. Today, virtually all of the CPUs that have the CPUID bit *also* match the "model >= 0x0e" check. This is confusing. Instead of an open-ended check, pick some models (INTEL_IVYBRIDGE and P4_WILLAMETTE) as the end of goofy CPUs that should enumerate the bit but don't. These models are relatively arbitrary but conservative pick for this. This makes it obvious that later CPUs (like Family 18+) no longer need to synthesize X86_FEATURE_CONSTANT_TSC. Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20250219184133.816753-14-sohil.mehta@intel.com --- arch/x86/kernel/cpu/intel.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu/intel.c') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2181304dc7d3..4cbb2e69bea1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -201,10 +201,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); @@ -257,10 +253,16 @@ static void early_init_intel(struct cpuinfo_x86 *c) * * It is also reliable across cores and sockets. (but not across * cabinets - we turn it off in that case explicitly.) + * + * Use a model-specific check for some older CPUs that have invariant + * TSC but may not report it architecturally via 8000_0007. */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } else if ((c->x86_vfm >= INTEL_P4_PRESCOTT && c->x86_vfm <= INTEL_P4_WILLAMETTE) || + (c->x86_vfm >= INTEL_CORE_YONAH && c->x86_vfm <= INTEL_IVYBRIDGE)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ -- cgit v1.2.3