From 7a7685acd2129e2e5d433636120b4c5038c03e51 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 14 Mar 2021 20:33:00 +1100 Subject: powerpc/eeh: Fix build failure with CONFIG_PROC_FS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build fails with CONFIG_PROC_FS=n: arch/powerpc/kernel/eeh.c:1571:12: error: ‘proc_eeh_show’ defined but not used 1571 | static int proc_eeh_show(struct seq_file *m, void *v) Wrap proc_eeh_show() in an ifdef to avoid it. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210314093300.131998-1-mpe@ellerman.id.au --- arch/powerpc/kernel/eeh.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel/eeh.c') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index cd60bc1c8701..01dbb44a0fe3 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1568,6 +1568,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, } EXPORT_SYMBOL_GPL(eeh_pe_inject_err); +#ifdef CONFIG_PROC_FS static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { @@ -1594,6 +1595,7 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_DEBUG_FS -- cgit v1.2.3 From 5ae5bc12d0728db60a0aa9b62160ffc038875f1a Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 12 Apr 2021 13:22:50 +0530 Subject: powerpc/eeh: Fix EEH handling for hugepages in ioremap space. During the EEH MMIO error checking, the current implementation fails to map the (virtual) MMIO address back to the pci device on radix with hugepage mappings for I/O. This results into failure to dispatch EEH event with no recovery even when EEH capability has been enabled on the device. eeh_check_failure(token) # token = virtual MMIO address addr = eeh_token_to_phys(token); edev = eeh_addr_cache_get_dev(addr); if (!edev) return 0; eeh_dev_check_failure(edev); <= Dispatch the EEH event In case of hugepage mappings, eeh_token_to_phys() has a bug in virt -> phys translation that results in wrong physical address, which is then passed to eeh_addr_cache_get_dev() to match it against cached pci I/O address ranges to get to a PCI device. Hence, it fails to find a match and the EEH event never gets dispatched leaving the device in failed state. The commit 33439620680be ("powerpc/eeh: Handle hugepages in ioremap space") introduced following logic to translate virt to phys for hugepage mappings: eeh_token_to_phys(): + pa = pte_pfn(*ptep); + + /* On radix we can do hugepage mappings for io, so handle that */ + if (hugepage_shift) { + pa <<= hugepage_shift; <= This is wrong + pa |= token & ((1ul << hugepage_shift) - 1); + } This patch fixes the virt -> phys translation in eeh_token_to_phys() function. $ cat /sys/kernel/debug/powerpc/eeh_address_cache mem addr range [0x0000040080000000-0x00000400807fffff]: 0030:01:00.1 mem addr range [0x0000040080800000-0x0000040080ffffff]: 0030:01:00.1 mem addr range [0x0000040081000000-0x00000400817fffff]: 0030:01:00.0 mem addr range [0x0000040081800000-0x0000040081ffffff]: 0030:01:00.0 mem addr range [0x0000040082000000-0x000004008207ffff]: 0030:01:00.1 mem addr range [0x0000040082080000-0x00000400820fffff]: 0030:01:00.0 mem addr range [0x0000040082100000-0x000004008210ffff]: 0030:01:00.1 mem addr range [0x0000040082110000-0x000004008211ffff]: 0030:01:00.0 Above is the list of cached io address ranges of pci 0030:01:00.. Before this patch: Tracing 'arg1' of function eeh_addr_cache_get_dev() during error injection clearly shows that 'addr=' contains wrong physical address: kworker/u16:0-7 [001] .... 108.883775: eeh_addr_cache_get_dev: (eeh_addr_cache_get_dev+0xc/0xf0) addr=0x80103000a510 dmesg shows no EEH recovery messages: [ 108.563768] bnx2x: [bnx2x_timer:5801(eth2)]MFW seems hanged: drv_pulse (0x9ae) != mcp_pulse (0x7fff) [ 108.563788] bnx2x: [bnx2x_hw_stats_update:870(eth2)]NIG timer max (4294967295) [ 108.883788] bnx2x: [bnx2x_acquire_hw_lock:2013(eth1)]lock_status 0xffffffff resource_bit 0x1 [ 108.884407] bnx2x 0030:01:00.0 eth1: MDC/MDIO access timeout [ 108.884976] bnx2x 0030:01:00.0 eth1: MDC/MDIO access timeout <..> After this patch: eeh_addr_cache_get_dev() trace shows correct physical address: -0 [001] ..s. 1043.123828: eeh_addr_cache_get_dev: (eeh_addr_cache_get_dev+0xc/0xf0) addr=0x40080bc7cd8 dmesg logs shows EEH recovery getting triggerred: [ 964.323980] bnx2x: [bnx2x_timer:5801(eth2)]MFW seems hanged: drv_pulse (0x746f) != mcp_pulse (0x7fff) [ 964.323991] EEH: Recovering PHB#30-PE#10000 [ 964.324002] EEH: PE location: N/A, PHB location: N/A [ 964.324006] EEH: Frozen PHB#30-PE#10000 detected <..> Fixes: 33439620680b ("powerpc/eeh: Handle hugepages in ioremap space") Cc: stable@vger.kernel.org # v5.3+ Reported-by: Dominic DeMarco Signed-off-by: Mahesh Salgaonkar Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/161821396263.48361.2796709239866588652.stgit@jupiter --- arch/powerpc/kernel/eeh.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel/eeh.c') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 01dbb44a0fe3..9058a26df29c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -362,14 +362,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) pa = pte_pfn(*ptep); /* On radix we can do hugepage mappings for io, so handle that */ - if (hugepage_shift) { - pa <<= hugepage_shift; - pa |= token & ((1ul << hugepage_shift) - 1); - } else { - pa <<= PAGE_SHIFT; - pa |= token & (PAGE_SIZE - 1); - } + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + pa <<= PAGE_SHIFT; + pa |= token & ((1ul << hugepage_shift) - 1); return pa; } -- cgit v1.2.3 From f3d03fc748d4e48f4cd8dea1bfeb173cb3b0c19f Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 2 Feb 2021 11:21:36 +0800 Subject: powerpc/eeh: remove unneeded semicolon Eliminate the following coccicheck warning: ./arch/powerpc/kernel/eeh.c:782:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1612236096-91154-1-git-send-email-yang.lee@linux.alibaba.com --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel/eeh.c') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9058a26df29c..f24cd53ff26e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -776,7 +776,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat default: eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); return -EINVAL; - }; + } return 0; } -- cgit v1.2.3